Exploratory Text Analytics Final Project

  • Arti Patel ()
  • Sudeepti Surapaneni(ss9ud@virginia.edu)
  • Adonis Lu ()
  • DS 5001
  • 29 April 2020
Deliverables 1) A collection of source files compressed in an archive (e.g., zip or tar.gz) and hosted on your UVA Box account. A manifest file describing those sources files, including their: Provenance: Where did they come from? Describe the website or other source and provide relevant URLs. Location: Provide a link to the source files in UVA Box. Description: What is the general subject matter of the corpus? Format: A description of both the file formats of the source files, e.g., plaintext, XML, CSV, etc., and the internal structure where applicable. For example, if XML then specify document type (e.g., TEI or XHTML).

Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
%matplotlib inline
from glob import glob
import re
import nltk
import plotly_express as px
# testing with one file first epub_file = "ALCOTT_MAY_LOUISA_AN_OLD-FASHIONED_GIRL-pg2787.txt" epub_file2 = "ALCOTT_MAY_LOUISA_EIGHT_COUSINS-pg2726.txt" epub_file3 = "ALCOTT_MAY_LOUISA_JACK_AND_JILL-pg2786.txt" epub_file4 = "ALCOTT_MAY_LOUISA_LITTLE_WOMEN-pg514.txt" csv_file1 = 'alcott_ofg.csv' csv_file2 = 'alcott_8cos.csv' csv_file3 = 'alcott_jj.csv' csv_file4 = 'alcott_lw.csv'OHCO = ['chap_num', 'para_num', 'sent_num', 'token_num']import os os.getcwd()

Import file into a dataframe

#epubs = '/Users/su/Desktop/Text_Analytics_Final/epubs' epub = open(epub_file, 'r', encoding='utf-8-sig').readlines() epub2 = open(epub_file2, 'r', encoding='utf-8-sig').readlines() epub3 = open(epub_file3, 'r', encoding='utf-8-sig').readlines() epub4 = open(epub_file4, 'r', encoding='utf-8-sig').readlines()df = pd.DataFrame(epub, columns=['line_str']) df2 = pd.DataFrame(epub2, columns=['line_str']) df3 = pd.DataFrame(epub3, columns=['line_str']) df4 = pd.DataFrame(epub4, columns=['line_str']) df.index.name = 'line_num' df2.index.name = 'line_num' df3.index.name = 'line_num' df4.index.name = 'line_num' df.line_str = df.line_str.str.strip() df2.line_str = df2.line_str.str.strip() df3.line_str = df3.line_str.str.strip() df4.line_str = df4.line_str.str.strip() df.sample(10) df2.sample(10) df3.sample(10) df4.sample(10)

Extract title of work from first line

title = df.loc[0].line_str.replace('The Project Gutenberg EBook of ', '')title2 = df2.loc[0].line_str.replace('The Project Gutenberg EBook of ', '')title3 = df3.loc[0].line_str.replace('The Project Gutenberg EBook of ', '')title4 = df4.loc[0].line_str.replace('The Project Gutenberg EBook of ', '')titletitle2title3title4df['title'] = titledf2['title'] = title2df3['title'] = title3df4['title'] = title4df.head()df2.head()df3.head()df4.head()# Remove Gutenberg's front and back mattera = df.line_str.str.match(r"\*\*\*\s*START OF (THE|THIS) PROJECT") b = df.line_str.str.match(r"\*\*\*\s*END OF (THE|THIS) PROJECT") a2 = df2.line_str.str.match(r"\*\*\*\s*START OF (THE|THIS) PROJECT") b2 = df2.line_str.str.match(r"\*\*\*\s*END OF (THE|THIS) PROJECT") a3 = df3.line_str.str.match(r"\*\*\*\s*START OF (THE|THIS) PROJECT") b3 = df3.line_str.str.match(r"\*\*\*\s*END OF (THE|THIS) PROJECT") a4 = df4.line_str.str.match(r"\*\*\*\s*START OF (THE|THIS) PROJECT") b4 = df4.line_str.str.match(r"\*\*\*\s*END OF (THE|THIS) PROJECT")an = df.loc[a].index[0] bn = df.loc[b].index[0] an2 = df2.loc[a2].index[0] bn2 = df2.loc[b2].index[0] an3 = df3.loc[a3].index[0] bn3 = df3.loc[b3].index[0] an4 = df4.loc[a4].index[0] bn4 = df4.loc[b4].index[0] an, bndf.loc[an].line_strdf2.loc[an2].line_str df3.loc[an3].line_str df4.loc[an4].line_strdf.loc[bn].line_str df2.loc[bn2].line_str df3.loc[bn3].line_str df4.loc[bn4].line_str## Take a slice between a and bdf = df.loc[an + 1 : bn - 1]df2 = df2.loc[an2 + 1 : bn2 - 1] df3 = df3.loc[an3 + 1 : bn3 - 1] df4 = df4.loc[an4 + 1 : bn4 - 1]df.head()df.tail()# Chunk by chapter## Find all chapter headerschap_lines = df.line_str.str.match(r"^\s*(Chapter|CHAPTER)\s+(\d+)", case=False) chap_lines2 = df2.line_str.str.match(r"^\s*(Chapter|CHAPTER)\s+(\d+)", case=False) chap_lines3 = df3.line_str.str.match(r"^\s*(Chapter|CHAPTER)\s+(\d+)", case=False) chap_lines4 = df4.line_str.str.match(r"^\s*(Chapter|CHAPTER)\s+(\d+)", case=False)df.loc[chap_lines] df2.loc[chap_lines2] df3.loc[chap_lines3] df4.loc[chap_lines4]## Assign numbers to chaptersdf.loc[chap_lines, 'chap_num'] = [i+1 for i in range(df.loc[chap_lines].shape[0])] df2.loc[chap_lines2, 'chap_num'] = [i+1 for i in range(df2.loc[chap_lines2].shape[0])] df3.loc[chap_lines3, 'chap_num'] = [i+1 for i in range(df3.loc[chap_lines3].shape[0])] df4.loc[chap_lines4, 'chap_num'] = [i+1 for i in range(df4.loc[chap_lines4].shape[0])]df.loc[chap_lines] df2.loc[chap_lines2] df3.loc[chap_lines3] df4.loc[chap_lines4]df3.sample(4)## Forward-fill chapter numbers to following text linesdf.chap_num = df.chap_num.ffill()df.sample(10)## Clean updf = df.loc[~df.chap_num.isna()] # Remove everything before Chapter 1df = df.loc[~chap_lines] # Remove chapter heading linesdf.chap_num = df.chap_num.astype('int') # Convert chap_num from float to intdf.head(10)## Group lines by chapter num OHCO[:1]dfc = df.groupby(OHCO[:1]).line_str.apply(lambda x: '\n'.join(x)).to_frame() # Make big string for each chapterdfc.head()# Split chapters into paragraphs dfp = dfc['line_str'].str.split(r'\n\n+', expand=True).stack()\ .to_frame().rename(columns={0:'para_str'})# dfc['line_str'].str.split(r'\n\n+', expand=True)# dfc['line_str'].str.split(r'\n\n+', expand=True).stack()dfp.head()dfp.index.names = OHCO[:2]dfp.head()dfp['para_str'] = dfp['para_str'].str.replace(r'\n', ' ').str.strip() # Remove newlinesdfp = dfp[~dfp['para_str'].str.match(r'^\s*$')] # Remove empty paragraphsdfp.head()# Split paragraphs into sentences NOTE: ADDED `"` to regex in `split()`dfs = dfp['para_str'].str.split(r'[.?!;:"]+', expand=True).stack()\ .to_frame().rename(columns={0:'sent_str'})dfs.index.names = OHCO[:3]dfs = dfs[~dfs['sent_str'].str.match(r'^\s*$')] # Remove empty paragraphsdfs.head(10)# Split sentences into tokensdft = dfs['sent_str'].str.split(r"[\s',-]+", expand=True).stack()\ .to_frame().rename(columns={0:'token_str'})dft.index.names = OHCO[:4]dft.head(10)# Gathering by Content Objectsents = dft.groupby(OHCO[:3]).token_str.apply(lambda x: ' '.join(x)).to_frame().rename(columns={'token_str':'content'}) paras = dft.groupby(OHCO[:2]).token_str.apply(lambda x: ' '.join(x)).to_frame().rename(columns={'token_str':'content'}) chaps = dft.groupby(OHCO[:1]).token_str.apply(lambda x: ' '.join(x)).to_frame().rename(columns={'token_str':'content'})def gather(ohco_level): return df.groupby(OHCO[:ohco_level]).token_str\ .apply(lambda x: ' '.join(x))\ .to_frame()\ .rename(columns={'token_str':'content'})sents.sample(10)# Visualize some things## Token lengthdft.token_str.str.len().plot.hist(bins=10, title="Tokens");## Sentence lengthsents.content.str.len().plot.hist(bins=20, title='Sentences');## Paragraph lengthparas.content.str.len().plot.hist(title="Paragraphs");## Chapter lengthchaps.content.str.len().plot.hist(title="Chapters");# Save work to CSVdft.to_csv(csv_file)OHCO = ['book_id', 'chap_num', 'para_num', 'sent_num', 'token_num'] text_file1 = 'austen-persuasion.csv' text_file2 = 'austen-sense.csv'text1 = pd.read_csv(text_file1) text2 = pd.read_csv(text_file2)text1['script_id'] = 1 text2['script_id'] = 2tokens = pd.concat([text1, text2]).dropna()tokens = tokens.set_index(OHCO)tokens.head()tokens['term_str'] = tokens['token_str'].str.lower().str.replace(r'[\W_]', '')# Create a vocabularytokens.head()vocab = tokens['term_str'].value_counts()\ .to_frame()\ .reset_index()\ .rename(columns={'term_str':'n', 'index':'term_str'})\ .sort_values('term_str') vocab.index.name = 'term_id'vocab.head()vocab.sample(5)Specifically, you should produce the following tables as data frames and save them as CSV tables: A library table (LIBRARY) with basic metadata about each book. The raw book title will be sufficient, i.e. with title and author combined. The path of the source file. A document table (DOC) with the preserved paragraphs of each book and an appropriate OHCO index. A token table (TOKEN) with an appropriate OHCO index the following annotations derived from NLTK: Part-of-speech tags. A vocabulary (VOCAB) table of terms with the following annotations derived from NLTK: Stopwords. Porter stems.
In [2]:
# CONFIG 

OHCO = ['book_id', 'chap_num', 'para_num', 'sent_num', 'token_num']
epub_dir = 'epubs_'

Inspect

Since Project Gutenberg texts vary widely in their markup, we define our chunking patterns by hand.

roman = '[IVXLCM]+' caps = "[A-Z';, -]+" chap_pats = { 507: { 'start_line': 20, 'end_line': 16261, 'volume': re.compile('^\sVOLUME\s+{}\s$'.format(roman)), 'chapter': re.compile('^Chapter\s+{}'.format(roman)) }, 6688: { 'start_line': 22, 'end_line': 2556, 'chapter': re.compile('^Chapter\s+{}.*'.format(roman)) }, 145: { 'start_line': 205, 'end_line': 33310, 'chapter': re.compile('^CHAPTER\s+{}'.format(roman)) } }

In [3]:
roman = '[IVXLCM]+'
caps = "[A-Z';, -]+"
chap_pats = {
   

    2787: {
        'start_line': 21,
        'end_line': 11235,
        #'chapter': re.compile("^\s*CHAPTER\s+{}\.\s*$".format(roman))
        'chapter': re.compile("^\s*CHAPTER\s+{}\. .*$".format(roman))
    },
    
    
     2726: {
        'start_line': 21,
        'end_line': 11235,
        #'chapter': re.compile("^\s*CHAPTER\s+{}\.\s*$".format(roman))
        'chapter': re.compile("^Chapter\s+\d+.+$")
    },
    
      2786: {
        'start_line': 21,
        'end_line': 11235,
        'chapter': re.compile("^\s*Chapter\s+{}\. .*$".format(roman))
    },
    
    514: {
        'start_line': 21,
        'end_line': 11235,
        'chapter': re.compile('^CHAPTER+\s*{}\s*$'.format(caps))
    },
    
    
    3499: {
        'start_line': 21,
        'end_line': 10256,
        'chapter': re.compile("^Chapter\s+\d+.+$")
        
    },
    
    
     2788: {
        'start_line': 21,
        'end_line': 11620,
        #'chapter': re.compile('^CHAPTER+\s*{}\s*$'.format(roman))
         'chapter': re.compile("^\s*CHAPTER\s+{}\. .*$".format(roman))
    },
    
    
      2804: {
        'start_line': 21,
        'end_line': 11235,
        'chapter': re.compile("^Chapter\s+\d+.+$")
    },
    
    
     3795: {
        'start_line': 21,
        'end_line': 11235,
        'chapter': re.compile('^CHAPTER+\s*{}\s*$'.format(roman))
    }
 
}
    

Register and Chunk

In [4]:
def acquire_epubs(epub_list, chap_pats, OHCO=OHCO):
    
    my_lib = []
    my_doc = []

    for epub_file in epub_list:
        
        # Get PG ID from filename
        book_id = int(epub_file.split('-')[-1].split('.')[0].replace('pg',''))
        print("BOOK ID", book_id)
        
        # Import file as lines
        lines = open(epub_file, 'r', encoding='utf-8-sig').readlines()
        df = pd.DataFrame(lines, columns=['line_str'])
        df.index.name = 'line_num'
        df.line_str = df.line_str.str.strip()
        df['book_id'] = book_id
        
        # FIX CHARACTERS TO IMPROVE TOKENIZATION
        df.line_str = df.line_str.str.replace('—', ' — ')
        df.line_str = df.line_str.str.replace('-', ' - ')
        
        # Get book title and put into LIB table -- note problems, though
        
        book_title = re.sub(r"The Project Gutenberg eBook( of|,) ", "", df.loc[0].line_str, flags=re.IGNORECASE)
        #book_title = re.sub(r"The Project Gutenberg eBook( of|,) ", "", df.loc[1].line_str, flags=re.IGNORECASE)
        book_title = re.sub(r"Project Gutenberg's ", "", book_title, flags=re.IGNORECASE)
        
        
        # Remove cruft
        a = chap_pats[book_id]['start_line'] - 1
        b = chap_pats[book_id]['end_line'] + 1
        df = df.iloc[a:b]
        
        # Chunk by chapter
        chap_lines = df.line_str.str.match(chap_pats[book_id]['chapter'])
        chap_nums = [i+1 for i in range(df.loc[chap_lines].shape[0])]
        df.loc[chap_lines, 'chap_num'] = chap_nums
        df.chap_num = df.chap_num.ffill()

        # Clean up
        df = df[~df.chap_num.isna()] # Remove chapter heading lines
        df = df.loc[~chap_lines] # Remove everything before Chapter 1
        df['chap_num'] = df['chap_num'].astype('int')
        
        # Group -- Note that we exclude the book level in the OHCO at this point
        df = df.groupby(OHCO[1:2]).line_str.apply(lambda x: '\n'.join(x)).to_frame() # Make big string
        
        # Split into paragrpahs
        df = df['line_str'].str.split(r'\n\n+', expand=True).stack().to_frame().rename(columns={0:'para_str'})
        df.index.names = OHCO[1:3] # MAY NOT BE NECESSARY UNTIL THE END
        df['para_str'] = df['para_str'].str.replace(r'\n', ' ').str.strip()
        df = df[~df['para_str'].str.match(r'^\s*$')] # Remove empty paragraphs
        
        # Set index
        df['book_id'] = book_id
        df = df.reset_index().set_index(OHCO[:3])

        # Register
        my_lib.append((book_id, book_title, epub_file))
        my_doc.append(df)

    docs = pd.concat(my_doc)
    library = pd.DataFrame(my_lib, columns=['book_id', 'book_title', 'book_file']).set_index('book_id')
    print("Done.")
    return library, docs
In [5]:
epubs = [epub for epub in sorted(glob(epub_dir+'/*.txt'))]
LIB, DOC = acquire_epubs(epubs, chap_pats)

LIB_arti = pd.read_csv('LIB_arti.csv')
DOC_arti = pd.read_csv('DOC_arti.csv')

DOC_arti = DOC_arti.rename(columns = {'story_num':'chap_num'})
DOC_arti = DOC_arti.set_index(['book_id','chap_num','para_num'])

LIB['book_id'] = LIB.index

LIB = LIB.append(LIB_arti)
DOC = DOC.append(DOC_arti)

LIB.index = LIB['book_id']
BOOK ID 2787
BOOK ID 2726
BOOK ID 2786
BOOK ID 3499
BOOK ID 2788
BOOK ID 514
BOOK ID 2804
BOOK ID 3795
Done.
In [6]:
LIB
Out[6]:
book_title book_file book_id
book_id
2787 An Old - fashioned Girl, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_AN_OLD-FASHIONED_GIRL... 2787
2726 Eight Cousins, by Louisa M. Alcott epubs_\ALCOTT_MAY_LOUISA_EIGHT_COUSINS-pg2726.txt 2726
2786 Jack and Jill, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_JACK_AND_JILL-pg2786.txt 2786
3499 Jo's Boys, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_JO’S_BOYS-pg3499.txt 3499
2788 Little Men, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_LITTLE_MEN-pg2788.txt 2788
514 Little Women, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_LITTLE_WOMEN-pg514.txt 514
2804 Rose in Bloom, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_ROSE_IN_BLOOM-pg2804.txt 2804
3795 Under the Lilacs, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_UNDER_THE_LILACS-pg37... 3795
1 The Works of Edgar Allan Poe Volume 1 (of 5) o... epubs/2147-0.txt 1
2 The Works of Edgar Allan Poe Volume 2 (of 5) o... epubs/2148-0.txt 2
3 The Works of Edgar Allan Poe Volume 3 (of 5) o... epubs/2149-0.txt 3
4 The Works of Edgar Allan Poe Volume 4 (of 5) o... epubs/2150-0.txt 4
5 The Works of Edgar Allan Poe Volume 5 (of 5) o... epubs/2151-0.txt 5
In [7]:
DOC.sample(10)
Out[7]:
para_str
book_id chap_num para_num
514 9 89 "Please forgive my rudeness, and come and danc...
14 112 "The Rival Painters."
23 42 "Bless me, what's all this?" cried the old lad...
3499 40 27 'Don't think of it, dear. Tell about the happy...
2788 29 22 “Now, Daisy!” called the high priest of Kitty ...
2786 10 61 “Who is it?” asked Jill, languidly, opening he...
2726 8 1 “I think I could, and I won't charge any inter...
2804 23 82 “Dear me, how you two have grown! You big thin...
2786 20 3 “Can't; we are off to Pebbly Beach the second ...
514 16 1 In the cold gray dawn the sisters lit their la...

Tokenize and Annotate

We use NLTK this time. Note that this process takes some time, mainly because the NLTK functions are not optimized for dataframes.

Note that we can choose between tokenizers. NLTK offers a variety of them. Here is a list.

In [8]:
def tokenize(doc_df, OHCO=OHCO, remove_pos_tuple=False, ws=False):
    
    # Paragraphs to Sentences
    df = doc_df.para_str\
        .apply(lambda x: pd.Series(nltk.sent_tokenize(x)))\
        .stack()\
        .to_frame()\
        .rename(columns={0:'sent_str'})
    
    # Sentences to Tokens
    # Local function to pick tokenizer
    def word_tokenize(x):
        if ws:
            s = pd.Series(nltk.pos_tag(nltk.WhitespaceTokenizer().tokenize(x)))
        else:
            s = pd.Series(nltk.pos_tag(nltk.word_tokenize(x))) # Discards stuff in between
        return s
            
    df = df.sent_str\
        .apply(word_tokenize)\
        .stack()\
        .to_frame()\
        .rename(columns={0:'pos_tuple'})
    
    # Grab info from tuple
    df['pos'] = df.pos_tuple.apply(lambda x: x[1])
    df['token_str'] = df.pos_tuple.apply(lambda x: x[0])
    if remove_pos_tuple:
        df = df.drop('pos_tuple', 1)
    
    # Add index
    df.index.names = OHCO
    
    return df
In [9]:
%%time
TOKEN = tokenize(DOC, ws=False)
Wall time: 1min 27s
In [10]:
TOKEN.head()
Out[10]:
pos_tuple pos token_str
book_id chap_num para_num sent_num token_num
2787 1 1 0 0 (“, IN) IN
1 (IT, NNP) NNP IT
2 ('S, POS) POS 'S
3 (time, NN) NN time
4 (to, TO) TO to
In [11]:
TOKEN[TOKEN.pos.str.match('^NNP')]
Out[11]:
pos_tuple pos token_str
book_id chap_num para_num sent_num token_num
2787 1 1 0 1 (IT, NNP) NNP IT
10 (Tom, NNP) NNP Tom
2 0 1 (Come, NNP) NNP Come
3 0 1 (Oh, NNP) NNP Oh
1 23 (Polly, NNP) NNP Polly
... ... ... ... ... ... ... ...
5 10 107 0 9 (’, NNP) NNP
1 4 (A.M, NNP) NNP A.M
7 10 (President, NNP) NNP President
8 21 (Ponnonner, NNP) NNP Ponnonner
22 (’, NNP) NNP

54314 rows × 3 columns

Reduce

Extract a vocabulary from the TOKEN table

In [12]:
TOKEN['term_str'] = TOKEN['token_str'].str.lower().str.replace('[\W_]', '')
In [13]:
VOCAB = TOKEN.term_str.value_counts().to_frame()\
    .rename(columns={'index':'term_str', 'term_str':'n'})\
    .sort_index().reset_index().rename(columns={'index':'term_str'})
VOCAB.index.name = 'term_id'
In [14]:
VOCAB['num'] = VOCAB.term_str.str.match("\d+").astype('int')
In [15]:
VOCAB
Out[15]:
term_str n num
term_id
0 206017 0
1 0 1 1
2 00 1 1
3 00000258 1 1
4 0000157 1 1
... ... ... ...
29554 échapper 1 0
29555 élite 1 0
29556 émeutes 1 0
29557 été 1 0
29558 être 3 0

29559 rows × 3 columns

Annotate (VOCAB)

Add Stopwords

We use NLTK's built in stopword list for English. Note that we can add and subtract from this list, or just create our own list and keep it in our data model.

In [16]:
sw = pd.DataFrame(nltk.corpus.stopwords.words('english'), columns=['term_str'])
sw = sw.reset_index().set_index('term_str')
sw.columns = ['dummy']
sw.dummy = 1
In [17]:
sw.sample(10)
Out[17]:
dummy
term_str
between 1
didn't 1
about 1
few 1
them 1
during 1
you've 1
couldn't 1
who 1
needn't 1
In [18]:
VOCAB['stop'] = VOCAB.term_str.map(sw.dummy)
VOCAB['stop'] = VOCAB['stop'].fillna(0).astype('int')
In [19]:
VOCAB[VOCAB.stop == 1].sample(10)
Out[19]:
term_str n num stop
term_id
13335 in 16973 0 1
17551 not 5774 0 1
18119 ours 41 0 1
12536 him 4593 0 1
1738 at 6738 0 1
29483 yourself 175 0 1
28976 whom 264 0 1
12198 having 618 0 1
26185 there 2611 0 1
11936 hadn 1 0 1

Add Stems

In [20]:
from nltk.stem.porter import PorterStemmer
stemmer1 = PorterStemmer()
VOCAB['stem_porter'] = VOCAB.term_str.apply(stemmer1.stem)

#from nltk.stem.snowball import SnowballStemmer
#stemmer2 = SnowballStemmer("english")
#VOCAB['stem_snowball'] = VOCAB.term_str.apply(stemmer2.stem)

#from nltk.stem.lancaster import LancasterStemmer
#stemmer3 = LancasterStemmer()
#VOCAB['stem_lancaster'] = VOCAB.term_str.apply(stemmer3.stem)
In [21]:
VOCAB.sample(10)
VOCAB.index.name = 'term_id'
In [22]:
#VOCAB[VOCAB.stem_porter != VOCAB.stem_lancaster]
In [23]:
#VOCAB['pos_max'] = TOKEN.pos.apply(lambda x: len(x.unique()))
#VOCAB['pos_max'] = TOKEN.pos.apply(lambda x: str.count())
#VOCAB['pos_max']  = TOKEN.pos.value_counts()
#VOCAB['pos_max']  = TOKEN.groupby('pos').size()
#VOCAB['pos_max']  = TOKEN.groupby('pos').mode()

MFCB = TOKEN.groupby(['term_str','pos']).pos.count().unstack().idxmax(1).to_frame()
VOCAB = pd.merge(VOCAB,MFCB, left_on='term_str', right_on = 'term_str')
VOCAB = VOCAB.rename(columns = {0:'pos_max'})

Save

In [24]:
VOCAB.sample(5)
VOCAB.index.name = 'term_id'
In [25]:
DOC.to_csv('DOC.csv')
LIB.to_csv('LIBRARY.csv')
VOCAB.to_csv('VOCAB.csv')
TOKEN.to_csv('TOKEN.csv')

TFIDF

In [26]:
count_method = 'n' # 'c' or 'n' # n = n tokens, c = distinct token (term) count
tf_method = 'sum' # sum, max, log, double_norm, raw, binary
tf_norm_k = .5 # only used for double_norm
idf_method = 'standard' # standard, max, smooth
gradient_cmap = 'YlGnBu' # YlGn, GnBu, YlGnBu; For tables; see https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html 
In [27]:
OHCO = ['book_id', 'chap_num', 'para_num', 'sent_num', 'token_num']
SENTS = OHCO[:4]
PARAS = OHCO[:3]
CHAPS = OHCO[:2]
BOOKS = OHCO[:1]
In [28]:
bag = CHAPS
In [29]:
pd.__version__
Out[29]:
'1.0.0'
In [30]:
sns.set()
%matplotlib inline

Prepare the data

Import tables

Bring the the tables we created last time.

In [31]:
%%time
LIB = pd.read_csv("LIBRARY.csv").set_index(BOOKS)
TOKEN = pd.read_csv('TOKEN.csv').set_index(OHCO)
VOCAB = pd.read_csv('VOCAB.csv').set_index('term_id')
# DOC = pd.read_csv(data_dir + "DOC.csv").set_index(PARAS)
Wall time: 1.15 s
In [32]:
LIB = LIB[['book_title', 'book_file']]
LIB
Out[32]:
book_title book_file
book_id
2787 An Old - fashioned Girl, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_AN_OLD-FASHIONED_GIRL...
2726 Eight Cousins, by Louisa M. Alcott epubs_\ALCOTT_MAY_LOUISA_EIGHT_COUSINS-pg2726.txt
2786 Jack and Jill, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_JACK_AND_JILL-pg2786.txt
3499 Jo's Boys, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_JO’S_BOYS-pg3499.txt
2788 Little Men, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_LITTLE_MEN-pg2788.txt
514 Little Women, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_LITTLE_WOMEN-pg514.txt
2804 Rose in Bloom, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_ROSE_IN_BLOOM-pg2804.txt
3795 Under the Lilacs, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_UNDER_THE_LILACS-pg37...
1 The Works of Edgar Allan Poe Volume 1 (of 5) o... epubs/2147-0.txt
2 The Works of Edgar Allan Poe Volume 2 (of 5) o... epubs/2148-0.txt
3 The Works of Edgar Allan Poe Volume 3 (of 5) o... epubs/2149-0.txt
4 The Works of Edgar Allan Poe Volume 4 (of 5) o... epubs/2150-0.txt
5 The Works of Edgar Allan Poe Volume 5 (of 5) o... epubs/2151-0.txt
In [33]:
VOCAB.head()
Out[33]:
term_str n num stop stem_porter pos_max
term_id
0 NaN 206017 0 0 NaN ,
1 0 1 1 0 0 CD
2 00 1 1 0 00 CD
3 00000258 1 1 0 00000258 CD
4 0000157 1 1 0 0000157 CD
In [34]:
VOCAB = VOCAB[~VOCAB.term_str.isna()]
In [35]:
VOCAB.sample(5)
Out[35]:
term_str n num stop stem_porter pos_max
term_id
18356 pail 28 0 0 pail NN
523 adept 4 0 0 adept NN
8081 dredging 1 0 0 dredg VBG
25802 taking 239 0 0 take VBG
26277 thought 1043 0 0 thought VBD
In [36]:
TOKEN.head()
Out[36]:
pos_tuple pos token_str term_str
book_id chap_num para_num sent_num token_num
2787 1 1 0 0 ('“', 'IN') IN NaN
1 ('IT', 'NNP') NNP IT it
2 ("'S", 'POS') POS 'S s
3 ('time', 'NN') NN time time
4 ('to', 'TO') TO to to
In [37]:
TOKEN = TOKEN[~TOKEN.term_str.isna()]
In [38]:
TOKEN.head()
Out[38]:
pos_tuple pos token_str term_str
book_id chap_num para_num sent_num token_num
2787 1 1 0 1 ('IT', 'NNP') NNP IT it
2 ("'S", 'POS') POS 'S s
3 ('time', 'NN') NN time time
4 ('to', 'TO') TO to to
5 ('go', 'VB') VB go go
In [39]:
# DOC.head()

Add term_id to TOKEN table

We need to do this to combine the VOCAB and TOKEN tables more efficiently. Note, we could have done this in the previous lab.

We use .map() because TOKEN and VOCAB do not share an index at this time.

VOCAB.reset_index().set_index('term_str').term_id term_str 0 1 1 2 10 3 100 4 1000 5 10000 6 1000000 7 10000000 8 10440 9 10800 10
In [40]:
TOKEN['term_id'] = TOKEN.term_str.map(VOCAB.reset_index().set_index('term_str').term_id)
In [41]:
TOKEN.head()
Out[41]:
pos_tuple pos token_str term_str term_id
book_id chap_num para_num sent_num token_num
2787 1 1 0 1 ('IT', 'NNP') NNP IT it 14145
2 ("'S", 'POS') POS 'S s 22463
3 ('time', 'NN') NN time time 26439
4 ('to', 'TO') TO to to 26510
5 ('go', 'VB') VB go go 11429

Add Max POS to VOCAB

Just in case it's not there. It's easy now that we have a share feature -- term_id -- between VOCAB and TOKEN.

Regarding collisions when using .idxmax(), the documentation says "If multiple values equal the maximum, the first row label with that value is returned."

In [42]:
# Demo
# TOKEN.groupby(['term_id', 'pos']).pos.count()
# TOKEN.groupby(['term_id', 'pos']).pos.count().unstack()
# TOKEN.groupby(['term_id', 'pos']).pos.count().unstack().idxmax(1)
In [43]:
VOCAB['pos_max'] = TOKEN.groupby(['term_id', 'pos']).pos.count().unstack().idxmax(1)
In [44]:
VOCAB.sample(5)
Out[44]:
term_str n num stop stem_porter pos_max
term_id
25412 sunbeams 2 0 0 sunbeam JJ
13384 incitamentum 1 0 0 incitamentum NN
20962 rakes 2 0 0 rake NNS
7153 detect 14 0 0 detect VB
10633 fountainhead 1 0 0 fountainhead NN

Compare POS Stats in TOKEN and VOCAB

Pause and look at distribution of POS tags. The POS table could become part of your data model (analytical edition) if you were interested in studying POS tags.

In [45]:
POS = TOKEN.pos.value_counts().to_frame().rename(columns={'pos':'n'})
POS.index.name = 'pos_id'
In [46]:
POS.sort_values('n').plot.bar(y='n', figsize=(15,5), rot=45);

Zipf's Law

$f \propto \frac{1}{r} $

$k = fr$

Add Term Rank to VOCAB

In [47]:
if 'term_rank' not in VOCAB.columns:
    VOCAB = VOCAB.sort_values('n', ascending=False).reset_index()
    VOCAB.index.name = 'term_rank'
    VOCAB = VOCAB.reset_index()
    VOCAB = VOCAB.set_index('term_id')
    VOCAB['term_rank'] = VOCAB['term_rank'] + 1
In [48]:
VOCAB.head()
Out[48]:
term_rank term_str n num stop stem_porter pos_max
term_id
26151 1 the 58042 0 1 the DT
1110 2 and 41247 0 1 and CC
17817 3 of 29442 0 1 of IN
26510 4 to 28593 0 1 to TO
219 5 a 26310 0 1 a DT

Alternate Rank

The term_rank as defined above assigns different ranks to words with the same frequency, which occurs in the long tail, e.g. with words that appear once. This measure groups words by term count.

In [49]:
new_rank = VOCAB.n.value_counts()\
    .sort_index(ascending=False).reset_index().reset_index()\
    .rename(columns={'level_0':'term_rank2', 'index':'n', 'n':'nn'})\
    .set_index('n')
In [50]:
new_rank.head()
Out[50]:
term_rank2 nn
n
58042 0 1
41247 1 1
29442 2 1
28593 3 1
26310 4 1
In [51]:
VOCAB['term_rank2'] = VOCAB.n.map(new_rank.term_rank2) + 1
In [52]:
VOCAB.head()
Out[52]:
term_rank term_str n num stop stem_porter pos_max term_rank2
term_id
26151 1 the 58042 0 1 the DT 1
1110 2 and 41247 0 1 and CC 2
17817 3 of 29442 0 1 of IN 3
26510 4 to 28593 0 1 to TO 4
219 5 a 26310 0 1 a DT 5
In [53]:
VOCAB['p'] = VOCAB.n / VOCAB.shape[0]

Compute Zipf's K

In [54]:
VOCAB['zipf_k'] = VOCAB.n * VOCAB.term_rank
VOCAB['zipf_k2'] = VOCAB.n * VOCAB.term_rank2
VOCAB['zipf_k3'] = VOCAB.p * VOCAB.term_rank2
In [55]:
VOCAB.describe().T
Out[55]:
count mean std min 25% 50% 75% max
term_rank 29556.0 14778.500000 8532.226614 1.000000 7389.750000 14778.500000 22167.250000 29556.000000
n 29556.0 37.567431 598.419437 1.000000 1.000000 3.000000 9.000000 58042.000000
num 29556.0 0.007376 0.085567 0.000000 0.000000 0.000000 0.000000 1.000000
stop 29556.0 0.005075 0.071060 0.000000 0.000000 0.000000 0.000000 1.000000
term_rank2 29556.0 589.034342 55.889659 1.000000 597.000000 603.000000 605.000000 605.000000
p 29556.0 0.001271 0.020247 0.000034 0.000034 0.000102 0.000305 1.963798
zipf_k 29556.0 48129.905535 26743.165360 19026.000000 26414.750000 38806.500000 64512.000000 160560.000000
zipf_k2 29556.0 7685.649411 18765.528221 605.000000 605.000000 1809.000000 5373.000000 160560.000000
zipf_k3 29556.0 0.260037 0.634914 0.020470 0.020470 0.061206 0.181790 5.432400

Words with low k

In [56]:
VOCAB[VOCAB.zipf_k <= VOCAB.zipf_k.quantile(.1)].sort_values('zipf_k3', ascending=True).head()
Out[56]:
term_rank term_str n num stop stem_porter pos_max term_rank2 p zipf_k zipf_k2 zipf_k3
term_id
26076 19026 terminate 1 0 0 termin VB 605 0.000034 19026 605 0.02047
1426 20991 archangel 1 0 0 archangel NNP 605 0.000034 20991 605 0.02047
28447 20992 wadding 1 0 0 wad NN 605 0.000034 20992 605 0.02047
1428 20993 archbishop 1 0 0 archbishop NN 605 0.000034 20993 605 0.02047
1443 20994 architects 1 0 0 architect NNS 605 0.000034 20994 605 0.02047

Words with high k

In [57]:
VOCAB[VOCAB.zipf_k >= VOCAB.zipf_k.quantile(.9)].sort_values('zipf_k3', ascending=False).head()
Out[57]:
term_rank term_str n num stop stem_porter pos_max term_rank2 p zipf_k zipf_k2 zipf_k3
term_id
12226 16 he 10035 0 1 he PRP 16 0.339525 160560 160560 5.432400
16095 39 me 4107 0 1 me PRP 39 0.138957 160173 160173 5.419306
3719 35 by 4566 0 1 by IN 35 0.154486 159810 159810 5.407024
17995 48 or 3322 0 1 or CC 48 0.112397 159456 159456 5.395047
28820 47 were 3383 0 1 were VBD 47 0.114461 159001 159001 5.379652

Visualize

Histogram of Zipf K

In [58]:
#px.histogram(VOCAB, 'zipf_k', marginal='box')
In [59]:
# px.histogram(VOCAB, 'zipf_k2', marginal='box')
In [60]:
# px.histogram(VOCAB, 'zipf_k3', marginal='box')

Rank and N

In [61]:
VSAMP1 = VOCAB[['n','term_rank','zipf_k','term_str','pos_max']]
# VSAMP2 = VOCAB[['n','term_rank2','zipf_k3']].drop_duplicates()
In [62]:
#px.scatter(VSAMP1, x='term_rank', y='n', log_y=False, log_x=False, hover_name='term_str', color='pos_max')
In [63]:
# px.scatter(VSAMP2, x='term_rank2', y='n', log_y=False, log_x=False)
In [64]:
#px.scatter(VSAMP1, x='term_rank', y='n', log_y=True, log_x=True, hover_name='term_str', color='pos_max')
In [65]:
# px.scatter(VSAMP2, x='term_rank2', y='n', log_y=True, log_x=True)

Demo Rank Index

In [66]:
rank_index = [1, 2, 3, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]
In [67]:
demo = VOCAB.loc[VOCAB.term_rank.isin(rank_index), ['term_str', 'term_rank', 'n', 'zipf_k', 'pos_max']]
In [68]:
demo.style.background_gradient(cmap=gradient_cmap, high=.5)
Out[68]:
term_str term_rank n zipf_k pos_max
term_id
26151 the 1 58042 58042 DT
1110 and 2 41247 82494 CC
17817 of 3 29442 88326 IN
26145 that 10 11925 119250 IN
11934 had 20 6748 134960 VBD
7807 do 30 5242 157260 VBP
22518 said 40 3858 154320 VBD
1075 an 50 3066 153300 DT
5899 could 60 2611 156660 MD
16799 more 70 2134 149380 RBR
18189 over 80 1743 139440 IN
17340 never 90 1561 140490 RB
22721 say 100 1366 136600 VB
15478 love 200 722 144400 NN
27113 turned 300 413 123900 VBD
16110 means 400 296 118400 NNS
2674 betty 500 223 111500 NNP
15394 longer 600 182 109200 RBR
9246 everyone 700 153 107100 NN
24275 somewhat 800 133 106400 RB
23785 sisters 900 118 106200 NNS
14389 joy 1000 107 107000 NN
22113 rising 2000 48 96000 VBG
2293 bay 3000 30 90000 NN
13987 introduction 4000 21 84000 NN
26264 thorn 5000 15 75000 NN
23013 secondly 6000 12 72000 RB
17178 national 7000 9 63000 JJ
26766 transaction 8000 8 64000 NN
In [69]:
# rank_index = [1, 2, 3, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 600, 700, 800]
# demo = VOCAB.loc[VOCAB.term_rank2.isin(rank_index), ['term_str', 'term_rank2', 'n', 'zipf_k2', 'pos_max']]
# demo.style.background_gradient(cmap=gradient_cmap, high=.5)

VOCAB Entropy

Compute P of VOCAB

This is the prior, or marginal, probability of a term.

In [70]:
%%time
VOCAB['p2'] = VOCAB.n / VOCAB.n.sum()
Wall time: 1.99 ms

Compute Entropy of VOCAB

In [71]:
VOCAB['h'] = VOCAB.p2 * np.log2(1/VOCAB.p2) # Self entropy of each word 
H = VOCAB.h.sum()
N_v = VOCAB.shape[0]
H_max = np.log2(N_v)
R = round(1 - (H/H_max), 2) * 100
In [72]:
print("H \t= {}\nH_max \t= {}\nR \t= {}%".format(H, H_max, int(R)))
H 	= 9.829010452294979
H_max 	= 14.851163413220117
R 	= 34%

BOW

In [73]:
BOW = TOKEN.groupby(bag+['term_id']).term_id.count()\
    .to_frame().rename(columns={'term_id':'n'})
In [74]:
BOW['c'] = BOW.n.astype('bool').astype('int')
In [75]:
BOW.head(10)
BOW.to_csv('BOW.csv')

Document-Term Matrix

We create a document-term count matrix. Note that we can create a matrix for any of the features in BOW. Also, see how the OHCO helps us distinguish between features and observation identity.

Note, these operations are slower than using groupby().

Create Count Matrix

In [76]:
%%time
DTCM = BOW[count_method].unstack().fillna(0).astype('int')
Wall time: 155 ms
In [77]:
DTCM.head()
Out[77]:
term_id 1 2 3 4 5 6 7 8 9 10 ... 29549 29550 29551 29552 29553 29554 29555 29556 29557 29558
book_id chap_num
1 1 0 0 0 0 1 0 0 0 0 3 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 4 ... 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 2 ... 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 2 ... 0 0 0 0 0 0 0 1 0 0

5 rows × 29556 columns

Compute TF

We could also compute that using BOW.groupby().

In [78]:
%%time
print('TF method:', tf_method)

if tf_method == 'sum':
    TF = DTCM.T / DTCM.T.sum()

elif tf_method == 'max':
    TF = DTCM.T / DTCM.T.max()

elif tf_method == 'log':
    TF = np.log10(1 + DTCM.T)
    
elif tf_method == 'raw':
    TF = DTCM.T

elif tf_method == 'double_norm':
    TF = DTCM.T / DTCM.T.max()
    TF = tf_norm_k + (1 - tf_norm_k) * TF[TF > 0] # EXPLAIN; may defeat purpose of norming

elif tf_method == 'binary':
    TF = DTCM.T.astype('bool').astype('int')
    
TF = TF.T
TF method: sum
Wall time: 162 ms
In [79]:
TF.head()
Out[79]:
term_id 1 2 3 4 5 6 7 8 9 10 ... 29549 29550 29551 29552 29553 29554 29555 29556 29557 29558
book_id chap_num
1 1 0.000000 0.0 0.0 0.0 0.000047 0.0 0.0 0.0 0.0 0.000140 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
2 0.000073 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000291 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
3 0.000000 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000713 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
4 0.000000 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
5 0.000000 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000099 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000049 0.0 0.0

5 rows × 29556 columns

Compute DF

In [80]:
%%time
DF = DTCM[DTCM > 0].count()
Wall time: 283 ms
In [81]:
DF.head()
Out[81]:
term_id
1    1
2    1
3    1
4    1
5    1
dtype: int64

Compute IDF

In [82]:
N = DTCM.shape[0]
In [83]:
print('IDF method:', idf_method)

if idf_method == 'standard':
    IDF = np.log10(N / DF)

elif idf_method == 'max':
    IDF = np.log10(DF.max() / DF) 

elif idf_method == 'smooth':
    IDF = np.log10((1 + N) / (1 + DF)) + 1 # Correct?
IDF method: standard

Compute TFIDF

In [84]:
TFIDF = TF * IDF
In [85]:
TFIDF.head()
Out[85]:
term_id 1 2 3 4 5 6 7 8 9 10 ... 29549 29550 29551 29552 29553 29554 29555 29556 29557 29558
book_id chap_num
1 1 0.000000 0.0 0.0 0.0 0.000112 0.0 0.0 0.0 0.0 0.000126 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
2 0.000174 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000263 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
3 0.000000 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000644 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
4 0.000000 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
5 0.000000 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000089 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000118 0.0 0.0

5 rows × 29556 columns

Move things to their places

In [86]:
VOCAB['df'] = DF
VOCAB['idf'] = IDF
In [87]:
VOCAB.head()
Out[87]:
term_rank term_str n num stop stem_porter pos_max term_rank2 p zipf_k zipf_k2 zipf_k3 p2 h df idf
term_id
26151 1 the 58042 0 1 the DT 1 1.963798 58042 58042 1.963798 0.052274 0.222570 247 0.001755
1110 2 and 41247 0 1 and CC 2 1.395554 82494 82494 2.791108 0.037148 0.176474 247 0.001755
17817 3 of 29442 0 1 of IN 3 0.996143 88326 88326 2.988429 0.026516 0.138865 247 0.001755
26510 4 to 28593 0 1 to TO 4 0.967418 114372 114372 3.869671 0.025752 0.135947 247 0.001755
219 5 a 26310 0 1 a DT 5 0.890175 131550 131550 4.450873 0.023695 0.127937 247 0.001755
In [88]:
%%time
BOW['tf'] = TF.stack()
BOW['tfidf'] = TFIDF.stack()
Wall time: 3.08 s
In [89]:
BOW.head()
Out[89]:
n c tf tfidf
book_id chap_num term_id
1 1 5 1 1 0.000047 0.000112
10 3 1 0.000140 0.000126
13 1 1 0.000047 0.000098
16 1 1 0.000047 0.000112
17 1 1 0.000047 0.000112

Apply TFIDF sum to VOCAB

In [90]:
VOCAB['tfidf_sum'] = TFIDF.sum()

Observe results

In [91]:
VOCAB.sort_values('tfidf_sum', ascending=False).head(20).style.background_gradient(cmap=gradient_cmap, high=1)
Out[91]:
term_rank term_str n num stop stem_porter pos_max term_rank2 p zipf_k zipf_k2 zipf_k3 p2 h df idf tfidf_sum
term_id
19680 105 polly 1325 0 0 polli NNP 104 0.044830 139125 137800 4.662336 0.001193 0.011588 26 0.979478 0.246200
14306 93 jo 1421 0 0 jo NNP 92 0.048078 132153 130732 4.423197 0.001280 0.012299 71 0.543193 0.235562
17608 23 nt 6003 0 0 nt RB 23 0.203106 138069 138069 4.671437 0.005406 0.040716 183 0.132001 0.182482
2541 193 ben 741 0 0 ben NNP 185 0.025071 143013 137085 4.638145 0.000667 0.007040 31 0.903090 0.181365
22269 87 rose 1573 0 0 rose NNP 86 0.053221 136851 135278 4.577006 0.001417 0.013406 118 0.322570 0.132242
6492 209 dan 675 0 0 dan NNP 199 0.022838 141075 134325 4.544762 0.000608 0.006495 29 0.932054 0.120144
14290 276 jill 454 0 0 jill NNP 256 0.015361 125304 116224 3.932332 0.000409 0.004602 23 1.032724 0.116546
26554 170 tom 819 0 0 tom NNP 165 0.027710 139230 135135 4.572168 0.000738 0.007675 44 0.750999 0.114378
16174 215 meg 652 0 0 meg NNP 204 0.022060 140180 133008 4.500203 0.000587 0.006303 46 0.731694 0.114222
19570 1287 plumfield 81 0 0 plumfield NNP 525 0.002741 104247 42525 1.438794 0.000073 0.001003 34 0.862973 0.111802
15644 254 mac 525 0 0 mac NNP 236 0.017763 133350 123900 4.192042 0.000473 0.005223 35 0.850384 0.105848
1995 355 bab 336 0 0 bab NNP 309 0.011368 119280 103824 3.512789 0.000303 0.003538 24 1.014240 0.100857
19156 302 phebe 400 0 0 phebe NNP 277 0.013534 120800 110800 3.748816 0.000360 0.004121 35 0.850384 0.087911
23386 18 she 7972 0 1 she PRP 18 0.269725 143496 143496 4.855055 0.007180 0.051133 223 0.046147 0.085959
14178 233 jack 600 0 0 jack NNP 218 0.020300 139800 130800 4.425497 0.000540 0.005865 65 0.581538 0.085879
14888 264 laurie 487 0 0 lauri NNP 245 0.016477 128568 119315 4.036913 0.000439 0.004893 47 0.722354 0.085124
2651 375 beth 319 0 0 beth NNP 320 0.010793 119625 102080 3.453783 0.000287 0.003380 29 0.932054 0.083636
1073 313 amy 378 0 0 ami NNP 286 0.012789 118314 108108 3.657734 0.000340 0.003922 43 0.760983 0.078558
17172 270 nat 469 0 0 nat NNP 250 0.015868 126630 117250 3.967046 0.000422 0.004735 34 0.862973 0.078477
12419 13 her 10838 0 1 her PRP$ 13 0.366694 140894 140894 4.767019 0.009761 0.065191 231 0.030840 0.078328
In [92]:
VOCAB[['term_rank','term_str','pos_max','tfidf_sum']]\
    .sort_values('tfidf_sum', ascending=False).head(50)\
    .style.background_gradient(cmap=gradient_cmap, high=1)
Out[92]:
term_rank term_str pos_max tfidf_sum
term_id
19680 105 polly NNP 0.246200
14306 93 jo NNP 0.235562
17608 23 nt RB 0.182482
2541 193 ben NNP 0.181365
22269 87 rose NNP 0.132242
6492 209 dan NNP 0.120144
14290 276 jill NNP 0.116546
26554 170 tom NNP 0.114378
16174 215 meg NNP 0.114222
19570 1287 plumfield NNP 0.111802
15644 254 mac NNP 0.105848
1995 355 bab NNP 0.100857
19156 302 phebe NNP 0.087911
23386 18 she PRP 0.085959
14178 233 jack NNP 0.085879
14888 264 laurie NNP 0.085124
2651 375 beth NNP 0.083636
1073 313 amy NNP 0.078558
17172 270 nat NNP 0.078477
12419 13 her PRP$ 0.078328
4183 453 celia NNP 0.076972
11887 348 gutenberg NNP 0.076073
4341 327 charlie NNP 0.074761
853 352 alec NNP 0.074319
29473 14 you PRP 0.073084
27349 238 uncle NNP 0.072490
2697 278 bhaer NNP 0.071502
9698 345 fanny NNP 0.071402
6910 301 demi NNP 0.069364
3233 138 boys NNS 0.068645
2674 500 betty NNP 0.065724
26509 492 tm NN 0.065367
26267 452 thorny NNP 0.063242
1853 239 aunt NNP 0.062122
20302 326 project NNP 0.060939
10681 324 frank NNP 0.059915
16940 98 mrs NNP 0.058152
16939 109 mr NNP 0.054054
15907 404 march NNP 0.051623
16228 319 men NNS 0.051253
16557 211 miss NNP 0.051239
6459 376 daisy NNP 0.050756
1436 562 archie NNP 0.050355
16065 484 maud NNP 0.049416
9683 392 fan NNP 0.048854
16674 580 molly NNP 0.046896
15306 85 ll MD 0.043059
8005 414 dr NNP 0.042937
11299 158 girls NNS 0.042432
26568 464 tommy NNP 0.042167
In [93]:
VOCAB.loc[VOCAB.pos_max != 'NNP', ['term_rank','term_str','pos_max','tfidf_sum']]\
    .sort_values('tfidf_sum', ascending=False)\
    .head(50).style.background_gradient(cmap=gradient_cmap, high=1)
Out[93]:
term_rank term_str pos_max tfidf_sum
term_id
17608 23 nt RB 0.182482
23386 18 she PRP 0.085959
12419 13 her PRP$ 0.078328
29473 14 you PRP 0.073084
3233 138 boys NNS 0.068645
26509 492 tm NN 0.065367
16228 319 men NNS 0.051253
15306 85 ll MD 0.043059
11299 158 girls NNS 0.042432
26378 293 thus RB 0.039245
25754 552 t NN 0.038247
15637 92 m VBP 0.037983
9764 249 father NN 0.036329
3230 167 boy NN 0.035364
15029 382 length NN 0.034837
16868 144 mother NN 0.031910
15127 214 life NN 0.031462
8582 989 electronic JJ 0.030963
29158 349 within IN 0.030297
7838 524 dog NN 0.030160
1614 128 asked VBD 0.029380
4499 296 children NNS 0.029338
28066 207 ve VBP 0.029091
11296 210 girl NN 0.028885
984 394 although IN 0.028362
14574 737 king NN 0.028316
7815 565 doctor NN 0.027413
23774 409 sir NN 0.027299
3737 230 ca MD 0.026520
22813 401 school NN 0.026443
3088 362 book NN 0.026257
29175 204 wo MD 0.026218
1208 169 answered VBD 0.026033
16696 360 money NN 0.025388
8653 541 em PRP 0.025359
26426 147 till NN 0.025151
17032 469 music NN 0.024934
6111 206 cried VBD 0.024694
15061 516 letter NN 0.024538
28547 189 want VBP 0.024504
26284 727 thousand CD 0.024266
3002 785 boat NN 0.024150
29262 566 works NNS 0.024082
11429 91 go VB 0.024064
6428 143 d MD 0.023987
21106 273 read VB 0.023937
11529 148 got VBD 0.023506
15478 200 love NN 0.023434
29249 162 work NN 0.023309
13451 331 indeed RB 0.023147
In [94]:
BOW = BOW.join(VOCAB[['term_str','pos_max']], on='term_id')
In [95]:
BOW.sort_values('tfidf', ascending=False).head(20)\
    .style.background_gradient(cmap=gradient_cmap, high=1)
Out[95]:
n c tf tfidf term_str pos_max
book_id chap_num term_id
2788 21 19570 1 1 0.111111 0.095886 plumfield NNP
14306 1 1 0.111111 0.060355 jo NNP
16228 1 1 0.111111 0.029705 men NNS
3 2 16802 23 1 0.010778 0.025807 morella NNP
2804 44 11887 84 1 0.016932 0.022910 gutenberg NNP
2788 21 3233 1 1 0.111111 0.022061 boys NNS
2804 44 26509 57 1 0.011490 0.020594 tm NN
5 9 3058 48 1 0.008158 0.019533 bonbon NNP
4 10 7279 32 1 0.007891 0.018896 diddler NN
2 6 27979 28 1 0.007867 0.018838 valdemar NNP
2726 24 11887 84 1 0.013503 0.018270 gutenberg NNP
5 4 12717 27 1 0.007452 0.017844 hopfrog NNP
8 18669 19 1 0.007422 0.017771 pathrick NNP
2787 12 19680 100 1 0.018119 0.017747 polly NNP
2804 44 20302 84 1 0.016932 0.017173 project NNP
3795 24 11887 84 1 0.012162 0.016455 gutenberg NNP
2726 24 26509 57 1 0.009163 0.016423 tm NN
2 5 27958 34 1 0.009081 0.016277 v NN
2787 7 19680 85 1 0.016393 0.016057 polly NNP
5 19680 83 1 0.016271 0.015937 polly NNP

Visualize

Rank and TFIDF Sum

In [96]:
#px.scatter(VOCAB, x='term_rank', y='tfidf_sum', hover_name='term_str', hover_data=['n'], color='pos_max')
In [97]:
# px.scatter(VOCAB, x='term_rank2', y='tfidf_sum', hover_name='term_str', hover_data=['n'], color='pos_max')

Log Rank and Log TFIDF Sum

In [98]:
#px.scatter(VOCAB, x='term_rank', y='tfidf_sum', hover_name='term_str', hover_data=['n'], color='pos_max', log_x=True, log_y=True)
In [99]:
# px.scatter(VOCAB, x='term_rank2', y='tfidf_sum', hover_name='term_str', hover_data=['n'], color='pos_max', 
#            log_x=True, log_y=True)

Show Demo Table with TFIDF

In [100]:
demo2 = VOCAB.loc[VOCAB.term_rank.isin(rank_index), ['term_str', 'pos_max', 'term_rank', 'n', 'zipf_k', 'tfidf_sum']]
In [101]:
demo2.style.background_gradient(cmap=gradient_cmap, high=1)
Out[101]:
term_str pos_max term_rank n zipf_k tfidf_sum
term_id
26151 the DT 1 58042 58042 0.022318
1110 and CC 2 41247 82494 0.016572
17817 of IN 3 29442 88326 0.011051
26145 that IN 10 11925 119250 0.004624
11934 had VBD 20 6748 134960 0.005251
7807 do VBP 30 5242 157260 0.014852
22518 said VBD 40 3858 154320 0.010826
1075 an DT 50 3066 153300 0.002332
5899 could MD 60 2611 156660 0.003041
16799 more RBR 70 2134 149380 0.002462
18189 over IN 80 1743 139440 0.005032
17340 never RB 90 1561 140490 0.007722
22721 say VB 100 1366 136600 0.008108
15478 love NN 200 722 144400 0.023434
27113 turned VBD 300 413 123900 0.011823
16110 means NNS 400 296 118400 0.019283
2674 betty NNP 500 223 111500 0.065724
15394 longer RBR 600 182 109200 0.013684
9246 everyone NN 700 153 107100 0.020609
24275 somewhat RB 800 133 106400 0.013636
23785 sisters NNS 900 118 106200 0.017404
14389 joy NN 1000 107 107000 0.013036
22113 rising VBG 2000 48 96000 0.009151
2293 bay NN 3000 30 90000 0.007658
13987 introduction NN 4000 21 84000 0.004541
26264 thorn NN 5000 15 75000 0.003985
23013 secondly RB 6000 12 72000 0.003485
17178 national JJ 7000 9 63000 0.003174
26766 transaction NN 8000 8 64000 0.003490
In [102]:
#px.scatter(demo2, x='term_rank', y='tfidf_sum', log_x=True, log_y=True, text='term_str', color='pos_max', size='n')

Word-Context Matrix Entropy

In [103]:
WCM = DTCM / DTCM.sum()
In [104]:
WCM.sum().head()
Out[104]:
term_id
1    1.0
2    1.0
3    1.0
4    1.0
5    1.0
dtype: float64
In [105]:
WCMh = WCM * np.log2(1/WCM)
In [106]:
VOCAB['h2'] = WCMh.sum()
In [107]:
VOCAB['h2'].hist();

X Factor

In [108]:
# VOCAB['x_factor'] = np.log(VOCAB.term_rank) * VOCAB.h2
In [109]:
# px.scatter(VOCAB, x='term_rank', y='x_factor', hover_name='term_str', color='pos_max', hover_data=['n'])
In [110]:
# VOCAB['x_factor2'] = VOCAB.term_rank2 * VOCAB.h2
In [111]:
VOCAB['x_factor2'] = np.log(VOCAB.term_rank2) * VOCAB.h2
In [112]:
px.scatter(VOCAB, x='term_rank2', y='x_factor2', hover_name='term_str', color='pos_max', hover_data=['n'])
In [113]:
# px.scatter(VOCAB, x='term_rank', y='x_factor', log_x=True, log_y=True, hover_name='term_str', color='pos_max', hover_data=['n'])
In [114]:
# px.scatter(VOCAB, x='term_rank2', y='x_factor2', log_x=True, log_y=True, hover_name='term_str', color='pos_max', hover_data=['n'])

Demo Table

In [115]:
# demo3 = VOCAB.loc[VOCAB.term_rank.isin(rank_index), ['term_str', 'pos_max', 'n', 'term_rank', 'zipf_k', 'tfidf_sum', 'h2', 'x_factor', 'term_rank2', 'x_factor2']]
In [116]:
# demo3.style.background_gradient(cmap=gradient_cmap)
In [117]:
# px.scatter(demo3, x='term_rank', y='x_factor', log_x=True, log_y=True, text='term_str', color='pos_max', size='n')
In [118]:
# px.scatter(demo3, x='term_rank2', y='x_factor2', log_x=False, log_y=False, text='term_str', color='pos_max', size='n')

Reduce VOCAB

Select Significant Terms based on X Factor

We want to take the upper and middle segment of our graph.

In [119]:
# key_col = 'tfidf_sum'
key_col = 'x_factor2'
key_min = VOCAB[key_col].quantile(.9)
rank_min = 200
In [120]:
SIGS = VOCAB.loc[(VOCAB[key_col] >= key_min) & (VOCAB.term_rank >= rank_min)].sort_values(key_col, ascending=False)
In [121]:
SIGS.shape[0]
Out[121]:
2805
In [122]:
SIGS[['pos_max', 'term_str', 'n', 'term_rank', 'zipf_k', 'df', 'idf', 'tfidf_sum','x_factor2']].head(100).style.background_gradient(cmap=gradient_cmap, high=1)
Out[122]:
pos_max term_str n term_rank zipf_k df idf tfidf_sum x_factor2
term_id
14967 VB leave 253 457 115621 147 0.227134 0.013890 41.379320
12364 VBD held 265 436 115540 150 0.218360 0.013137 41.372233
4128 VBD caught 197 569 112093 129 0.283862 0.013035 41.343166
8641 RB else 238 479 114002 140 0.248324 0.013819 41.252227
27111 NN turn 278 419 116482 153 0.209760 0.012972 41.163218
25802 VBG taking 239 477 114003 144 0.236089 0.013082 41.110264
5100 VBG coming 309 384 118656 158 0.195795 0.014899 41.076993
29199 VB wonder 204 555 113220 131 0.277180 0.013006 41.061266
3458 VBD brought 332 358 118856 168 0.169142 0.012631 41.058465
8377 JJ easy 198 567 112266 126 0.294081 0.013113 41.043533
24024 NN smile 245 468 114660 140 0.248324 0.014923 41.030358
26232 VBG thinking 224 499 111776 139 0.251437 0.012957 41.026709
27113 VBD turned 413 300 123900 188 0.120294 0.011823 40.987424
1510 NNS arms 256 451 115456 142 0.242163 0.014446 40.976655
21812 NN rest 336 356 119616 166 0.174344 0.013432 40.903754
24569 VBD spoke 338 351 118638 169 0.166565 0.012842 40.835396
23786 VB sit 180 605 108900 119 0.318905 0.013318 40.824385
15766 VBG making 304 391 118864 159 0.193055 0.013106 40.783993
23791 VBG sitting 202 557 112514 124 0.301030 0.014745 40.778128
26534 RB together 321 370 118770 160 0.190332 0.013484 40.768746
12510 JJ high 246 465 114390 141 0.245233 0.013880 40.763012
9133 RB especially 213 522 111186 130 0.280508 0.013277 40.761148
22669 NN satisfaction 168 641 107688 114 0.337547 0.012692 40.760650
23501 JJ short 233 485 113005 137 0.257731 0.013594 40.758889
3374 JJ bright 228 494 112632 132 0.273878 0.014627 40.721521
9313 IN except 177 616 109032 119 0.318905 0.012641 40.720122
25317 RB suddenly 287 407 116809 150 0.218360 0.014280 40.713335
17258 VBP need 319 377 120263 159 0.193055 0.014783 40.712745
13885 NN interest 282 415 117030 145 0.233084 0.015143 40.672142
6381 VBD cut 218 511 111398 130 0.280508 0.012954 40.660302
20990 VBD ran 254 455 115570 142 0.242163 0.014389 40.654382
17846 RB often 356 330 117480 168 0.169142 0.013646 40.645111
7855 VBG doing 240 475 114000 135 0.264118 0.014233 40.630317
23648 NN sight 267 431 115077 148 0.224190 0.013172 40.626689
11478 VBN gone 335 357 119595 159 0.193055 0.015586 40.614491
17369 JJ next 387 309 119583 172 0.158923 0.014021 40.603242
1256 JJ anxious 232 486 112752 132 0.273878 0.015090 40.577229
14840 JJ late 238 478 113764 135 0.264118 0.014172 40.564354
22490 JJ sad 146 730 106580 109 0.357025 0.012841 40.549052
24983 VBD stood 476 267 127092 197 0.099985 0.011433 40.546933
10410 VBD followed 224 498 111552 132 0.273878 0.014177 40.544219
16333 VBD met 164 653 107092 110 0.353059 0.013160 40.535228
17296 DT neither 167 643 107381 113 0.341373 0.013819 40.529700
11320 VBG giving 164 656 107584 112 0.345234 0.012053 40.521688
26169 PRP themselves 259 445 115255 147 0.227134 0.012990 40.521413
24536 NN spite 245 467 114415 137 0.257731 0.014241 40.514991
3639 NN burst 132 808 106656 104 0.377418 0.012156 40.513939
14870 VBD laughed 351 340 119340 162 0.184937 0.014950 40.499623
2468 IN behind 354 336 118944 166 0.174344 0.014123 40.491627
11227 VBG getting 297 399 118503 150 0.218360 0.014225 40.489958
26973 NN trouble 260 443 115180 140 0.248324 0.014623 40.486869
25141 JJ strong 269 428 115132 147 0.227134 0.013764 40.483346
16491 NN minute 395 305 120475 178 0.144032 0.012957 40.466904
14976 VBD led 186 592 110112 121 0.311666 0.012819 40.432386
22672 JJ satisfied 167 645 107715 114 0.337547 0.013063 40.421110
8914 RB enough 433 290 125570 176 0.148939 0.014656 40.375696
6862 NN delight 166 648 107568 111 0.349129 0.014193 40.359998
21120 JJ real 230 490 112700 133 0.270600 0.013957 40.352634
23164 VBD sent 235 483 113505 129 0.283862 0.015635 40.344609
24025 VBD smiled 146 732 106872 106 0.369146 0.012797 40.342451
10536 VB forget 231 488 112728 131 0.277180 0.014667 40.334386
23054 VBG seeing 178 609 108402 114 0.337547 0.013560 40.331735
3393 VB bring 249 462 115038 138 0.254573 0.014214 40.330908
8340 JJ early 187 589 110143 117 0.326266 0.013827 40.326784
26446 NNS times 301 395 118895 154 0.206931 0.013623 40.320339
19521 JJ pleased 220 507 111540 125 0.297542 0.014840 40.312398
947 RB almost 202 558 112716 124 0.301030 0.013199 40.306497
12264 VB hear 355 332 117860 157 0.198552 0.016450 40.300014
14525 VBD kept 389 308 119812 165 0.176968 0.015884 40.290441
23534 VB show 351 341 119691 162 0.184937 0.014958 40.285910
23285 JJ several 353 337 118961 162 0.184937 0.014210 40.272354
2499 VB believe 317 379 120143 153 0.209760 0.014038 40.256824
24422 VB speak 248 463 114824 134 0.267347 0.014622 40.243809
11314 VBN given 270 427 115290 142 0.242163 0.014405 40.228508
9851 VB feel 452 280 126560 187 0.122610 0.012990 40.227604
23974 RB slowly 152 705 107160 106 0.369146 0.013042 40.225733
5815 NN corner 195 572 111540 116 0.329994 0.014800 40.198055
18620 VBD passed 205 550 112750 122 0.308092 0.014769 40.195698
22725 VBG saying 418 297 124146 172 0.158923 0.015313 40.180512
19525 NN pleasure 232 487 112984 125 0.297542 0.015821 40.166538
18374 NN pair 186 593 110298 114 0.337547 0.014796 40.157696
18108 MD ought 273 425 116025 134 0.267347 0.017093 40.151251
17499 NN none 136 783 106488 101 0.390130 0.012633 40.145026
23729 IN since 354 334 118236 160 0.190332 0.015106 40.130726
18390 JJ pale 153 699 106947 105 0.373262 0.014274 40.127088
28573 JJ warm 140 754 105560 102 0.385852 0.013208 40.111028
29128 VBP wish 417 298 124266 167 0.171735 0.016179 40.076606
23644 NN sigh 120 877 105240 95 0.416728 0.012154 40.072048
29266 NN world 493 262 129166 194 0.106650 0.011882 40.056827
3671 JJ busy 243 473 114939 128 0.287242 0.016474 40.052363
12095 RB hardly 140 758 106120 102 0.385852 0.012202 40.052001
9865 VBD fell 430 291 125130 178 0.144032 0.014612 40.048955
18101 NNS others 323 368 118864 153 0.209760 0.014350 40.041039
21128 RB really 389 307 119423 169 0.166565 0.014395 40.031381
25316 JJ sudden 253 459 116127 133 0.270600 0.016176 40.025148
27425 VB understand 208 537 111696 121 0.311666 0.014152 40.020386
10071 JJ fine 385 310 119350 166 0.174344 0.015618 40.014417
23252 VBN set 499 261 130239 191 0.113418 0.012745 40.001146
23998 JJ small 444 286 126984 178 0.144032 0.014304 39.999989
1612 VB ask 289 405 117045 139 0.251437 0.016435 39.998588

Save Work

In [123]:
VOCAB.to_csv('VOCAB2.csv')
TOKEN.to_csv('TOKEN2.csv')
BOW.to_csv('DOC2.csv')
DTCM.to_csv('DTCM.csv')
TFIDF.to_csv('TFIDF.csv')
#SIGS.to_csv('SIGS.csv')
#WCM.to_csv('WCM.csv')
# BOW.to_csv('BOW.csv')

Similarity and Distance Measures

Config

We define two OHCO lists, one to match the reduced TFIDF table we are importing, and the other to define the table after we compress this table to make clustering easier.

In [124]:
OHCO_src = ['book_id', 'chap_num']
OHCO = ['book_id']

Import

In [125]:
import pandas as pd
import numpy as np
import re 
from numpy.linalg import norm
from scipy.spatial.distance import pdist
In [126]:
import seaborn as sns
sns.set(style="ticks")

%matplotlib inline

Prepare the Data

Import tables

In [127]:
TFIDF = pd.read_csv('TFIDF.csv').set_index(OHCO_src)
VOCAB = pd.read_csv('VOCAB.csv').set_index('term_id')
LIB = pd.read_csv('LIBRARY.csv').set_index('book_id')
In [128]:
TFIDF.head()
Out[128]:
1 2 3 4 5 6 7 8 9 10 ... 29549 29550 29551 29552 29553 29554 29555 29556 29557 29558
book_id chap_num
1 1 0.000000 0.0 0.0 0.0 0.000112 0.0 0.0 0.0 0.0 0.000126 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
2 0.000174 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000263 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
3 0.000000 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000644 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
4 0.000000 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0
5 0.000000 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.000089 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000118 0.0 0.0

5 rows × 29556 columns

Add Some Labels to the Library

In [129]:
LIB.loc[LIB.book_title.str.contains('Alcott'), 'author'] = 'Alcott'
LIB['author'] = LIB['author'].fillna('Poe')
LIB['title'] = LIB.book_title.str.split(', by').apply(lambda x: x[0])
LIB.columns = ['book_id','book_title','book_file','author','title']
In [130]:
LIB
Out[130]:
book_id book_title book_file author title
book_id
2787 An Old - fashioned Girl, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_AN_OLD-FASHIONED_GIRL... 2787 Alcott An Old - fashioned Girl
2726 Eight Cousins, by Louisa M. Alcott epubs_\ALCOTT_MAY_LOUISA_EIGHT_COUSINS-pg2726.txt 2726 Alcott Eight Cousins
2786 Jack and Jill, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_JACK_AND_JILL-pg2786.txt 2786 Alcott Jack and Jill
3499 Jo's Boys, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_JO’S_BOYS-pg3499.txt 3499 Alcott Jo's Boys
2788 Little Men, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_LITTLE_MEN-pg2788.txt 2788 Alcott Little Men
514 Little Women, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_LITTLE_WOMEN-pg514.txt 514 Alcott Little Women
2804 Rose in Bloom, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_ROSE_IN_BLOOM-pg2804.txt 2804 Alcott Rose in Bloom
3795 Under the Lilacs, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_UNDER_THE_LILACS-pg37... 3795 Alcott Under the Lilacs
1 The Works of Edgar Allan Poe Volume 1 (of 5) o... epubs/2147-0.txt 1 Poe The Works of Edgar Allan Poe Volume 1 (of 5) o...
2 The Works of Edgar Allan Poe Volume 2 (of 5) o... epubs/2148-0.txt 2 Poe The Works of Edgar Allan Poe Volume 2 (of 5) o...
3 The Works of Edgar Allan Poe Volume 3 (of 5) o... epubs/2149-0.txt 3 Poe The Works of Edgar Allan Poe Volume 3 (of 5) o...
4 The Works of Edgar Allan Poe Volume 4 (of 5) o... epubs/2150-0.txt 4 Poe The Works of Edgar Allan Poe Volume 4 (of 5) o...
5 The Works of Edgar Allan Poe Volume 5 (of 5) o... epubs/2151-0.txt 5 Poe The Works of Edgar Allan Poe Volume 5 (of 5) o...

Collapse TFIDF

We want to work with larger bags in this notebook, in order to better visualize our resulting clusters.

In [131]:
TFIDF = TFIDF.groupby(OHCO).mean()
In [132]:
TFIDF
Out[132]:
1 2 3 4 5 6 7 8 9 10 ... 29549 29550 29551 29552 29553 29554 29555 29556 29557 29558
book_id
1 0.000022 0.000000 0.000000 0.000000 0.000014 0.000000 0.000000 0.000000 0.000000 0.000184 ... 0.00010 0.000057 0.000057 0.000000 0.000000 0.000000 0.000000 0.000015 0.000000 0.000000
2 0.000000 0.000000 0.000023 0.000023 0.000000 0.000000 0.000000 0.000000 0.000000 0.000097 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
3 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
4 0.000000 0.000034 0.000000 0.000000 0.000000 0.000034 0.000034 0.000000 0.000000 0.000157 ... 0.00001 0.000000 0.000000 0.000077 0.000077 0.000012 0.000012 0.000000 0.000012 0.000041
5 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000097 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000054
514 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
2726 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000013 0.000013 0.000012 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
2786 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000011 0.000011 0.000019 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
2787 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
2788 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
2804 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000018 0.000018 0.000017 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
3499 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
3795 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000061 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

13 rows × 29556 columns

Create a DOC table

We want to create a new table that maps the OHCO levels to a single doc_id. We do this so that when we create a table to store pairs of docs and their distances, we can use a single-valued ID for each docs.

This table will also be used to store cluster assignments.

All of this will become clearer below!

In [133]:
DOC = TFIDF.reset_index()[OHCO] # We create a table from the OHCO in our TFIDF table
In [134]:
DOC.head()
Out[134]:
book_id
0 1
1 2
2 3
3 4
4 5
In [135]:
DOC.index.name = 'doc_id' # We give the new index a name
In [136]:
DOC.head()
Out[136]:
book_id
doc_id
0 1
1 2
2 3
3 4
4 5

Add a meaningful Title to DOC index

In [137]:
DOC['title'] = DOC.book_id.map(LIB.author) \
    + '-' + DOC[OHCO].apply(lambda x: x.astype('str').str.cat(sep='-'), 1) \
    + ': '+ DOC.book_id.map(LIB.title) 
In [138]:
DOC
Out[138]:
book_id title
doc_id
0 1 Poe-1: The Works of Edgar Allan Poe Volume 1 (...
1 2 Poe-2: The Works of Edgar Allan Poe Volume 2 (...
2 3 Poe-3: The Works of Edgar Allan Poe Volume 3 (...
3 4 Poe-4: The Works of Edgar Allan Poe Volume 4 (...
4 5 Poe-5: The Works of Edgar Allan Poe Volume 5 (...
5 514 Alcott-514: Little Women
6 2726 Alcott-2726: Eight Cousins
7 2786 Alcott-2786: Jack and Jill
8 2787 Alcott-2787: An Old - fashioned Girl
9 2788 Alcott-2788: Little Men
10 2804 Alcott-2804: Rose in Bloom
11 3499 Alcott-3499: Jo's Boys
12 3795 Alcott-3795: Under the Lilacs

Create Normalized Tables

In [139]:
L0 = TFIDF.astype('bool').astype('int')
L1 = TFIDF.apply(lambda x: x / x.sum(), 1)
L2 = TFIDF.apply(lambda x: x / norm(x), 1)
In [140]:
# ((L2.T)**2).sum()

Create Doc Pair Table

Create a table to store our results.

Note that pist() is a "distance matrix computation from a collection of raw observation vectors stored in a rectangular array".

In [141]:
PAIRS = pd.DataFrame(index=pd.MultiIndex.from_product([DOC.index.tolist(), DOC.index.tolist()])).reset_index()
PAIRS = PAIRS[PAIRS.level_0 < PAIRS.level_1].set_index(['level_0','level_1'])
PAIRS.index.names = ['doc_a', 'doc_b']
In [142]:
PAIRS.shape
Out[142]:
(78, 0)
In [143]:
PAIRS.head()
Out[143]:
doc_a doc_b
0 1
2
3
4
5

Compute Distances

In [144]:
%time PAIRS['cityblock'] = pdist(TFIDF, 'cityblock')
Wall time: 6.98 ms
In [145]:
%time PAIRS['euclidean'] = pdist(TFIDF, 'euclidean')
Wall time: 7.98 ms
In [146]:
%time PAIRS['cosine'] = pdist(TFIDF, 'cosine')
Wall time: 7.98 ms
In [147]:
%time PAIRS['jaccard'] = pdist(L0, 'jaccard') # Fast, and similar to js
Wall time: 20.9 ms
In [148]:
%time PAIRS['dice'] = pdist(L0, 'dice')
Wall time: 4.99 ms
In [149]:
# %time PAIRS['js'] = pdist(L1, 'jensenshannon') # Turns out to be really slow                               
In [150]:
%time PAIRS['euclidean2'] = pdist(L2, 'euclidean') # Should be the same as cosine (colinear)
Wall time: 4.99 ms
In [151]:
%time PAIRS['js'] = pdist(TFIDF, 'jensenshannon')
Wall time: 28.9 ms
In [152]:
import scipy
scipy.__version__
Out[152]:
'1.3.2'
In [153]:
PAIRS.head()
Out[153]:
cityblock euclidean cosine jaccard dice euclidean2 js
doc_a doc_b
0 1 0.529754 0.007972 0.599803 0.641180 0.471865 1.095265 0.582456
2 0.611615 0.011209 0.757811 0.712373 0.553245 1.231106 0.646053
3 0.582552 0.008748 0.676188 0.656640 0.488804 1.162917 0.610223
4 0.645247 0.011271 0.782488 0.702160 0.541023 1.250990 0.651606
5 0.576153 0.011543 0.919940 0.737200 0.583782 1.356422 0.695814

Compare Distributions

In [154]:
if PAIRS.shape[0] > 1000:
    SAMPLE = PAIRS.sample(1000)
else:
    SAMPLE = PAIRS
In [155]:
sns.pairplot(SAMPLE)
Out[155]:
<seaborn.axisgrid.PairGrid at 0x2470c600fd0>
In [156]:
PAIRS.sort_values('cosine').head(20).style.background_gradient('YlGn', high=1)
Out[156]:
cityblock euclidean cosine jaccard dice euclidean2 js
doc_a doc_b
6 10 0.279733 0.004397 0.203373 0.571110 0.399688 0.637767 0.506003
9 11 0.320440 0.007974 0.408351 0.582797 0.411230 0.903716 0.527273
0 1 0.529754 0.007972 0.599803 0.641180 0.471865 1.095265 0.582456
5 11 0.325244 0.008646 0.604417 0.587026 0.415454 1.099470 0.538363
1 3 0.533722 0.007259 0.611246 0.632314 0.462324 1.105664 0.583025
5 9 0.309606 0.010875 0.665723 0.565483 0.394197 1.153883 0.539526
1 2 0.552469 0.009888 0.674817 0.705388 0.544864 1.161737 0.610005
0 3 0.582552 0.008748 0.676188 0.656640 0.488804 1.162917 0.610223
1 4 0.606536 0.010094 0.729115 0.707712 0.547643 1.207572 0.633531
2 3 0.614765 0.010604 0.749064 0.723946 0.567331 1.223980 0.641860
3 4 0.633443 0.010484 0.749497 0.704986 0.544384 1.224334 0.640562
6 11 0.342550 0.008149 0.754548 0.599891 0.428461 1.228452 0.561977
10 11 0.321504 0.008077 0.757095 0.587838 0.416268 1.230524 0.547727
0 2 0.611615 0.011209 0.757811 0.712373 0.553245 1.231106 0.646053
4 0.645247 0.011271 0.782488 0.702160 0.541023 1.250990 0.651606
6 7 0.313452 0.009562 0.785452 0.548156 0.377559 1.253357 0.538337
7 11 0.341281 0.009285 0.798321 0.587712 0.416141 1.263583 0.555243
10 0.324154 0.009572 0.799013 0.577875 0.406346 1.264131 0.553234
5 6 0.323584 0.010286 0.820897 0.593285 0.421752 1.281325 0.559031
2 4 0.659703 0.012709 0.824429 0.709710 0.550039 1.284079 0.670374
In [157]:
# def get_comps(doc_id):
#     idx = pd.IndexSlice
#     a = PAIRS.loc[idx[doc_id, :], :]
#     b = PAIRS.loc[idx[:, doc_id], :]
#     return pd.concat([a,b])

Create Clusters

Hiearchical

In [158]:
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt
In [159]:
def hca(sims, linkage_method='ward', color_thresh=.3, figsize=(10, 10)):
    tree = sch.linkage(sims, method=linkage_method)
    labels = DOC.title.values
    plt.figure()
    fig, axes = plt.subplots(figsize=figsize)
    dendrogram = sch.dendrogram(tree, 
                                labels=labels, 
                                orientation="left", 
                                count_sort=True,
                                distance_sort=True,
                                above_threshold_color='.75',
                                color_threshold=color_thresh
                               )
    plt.tick_params(axis='both', which='major', labelsize=14)
In [160]:
hca(PAIRS.cosine, color_thresh=1)
<Figure size 432x288 with 0 Axes>
In [161]:
hca(PAIRS.jaccard, color_thresh=.6)
<Figure size 432x288 with 0 Axes>
In [162]:
hca(PAIRS.euclidean, color_thresh=.3)
<Figure size 432x288 with 0 Axes>
In [163]:
hca(PAIRS.cityblock, color_thresh=8)
<Figure size 432x288 with 0 Axes>
In [164]:
hca(PAIRS.js, color_thresh=.6)
<Figure size 432x288 with 0 Axes>

K-Means

K-Means only uses Euclidean distance. Why?

  • The K-Means procedure does not explicitly use pairwise distances between data points.
  • Instead, it repeatedly assigns points to the closest centroid thereby using Euclidean distance from data points to a centroid.
  • However, K-Means is implicitly based on pairwise Euclidean distances between data points, because the sum of squared deviations from centroid is equal to the sum of pairwise squared Euclidean distances divided by the number of points.
  • The term "centroid" is itself from Euclidean geometry. It is multivariate mean in Euclidean space. Euclidean space is about euclidean distances. Non-Euclidean distances will generally not span Euclidean space. That's why K-Means is for Euclidean distances only.

See the Cross Validated post on this.

In [165]:
from sklearn.cluster import KMeans
In [166]:
n_clusters = 4
In [167]:
DOC['y_raw'] = KMeans(n_clusters).fit_predict(TFIDF)
DOC['y_L0'] = KMeans(n_clusters).fit_predict(L0)
DOC['y_L1'] = KMeans(n_clusters).fit_predict(L1)
DOC['y_L2'] = KMeans(n_clusters).fit_predict(L2)
In [168]:
DOC.sort_values('y_raw').style.background_gradient(cmap='YlGn', high=1)
Out[168]:
book_id title y_raw y_L0 y_L1 y_L2
doc_id
12 3795 Alcott-3795: Under the Lilacs 0 0 3 3
5 514 Alcott-514: Little Women 1 0 0 0
6 2726 Alcott-2726: Eight Cousins 1 0 0 3
7 2786 Alcott-2786: Jack and Jill 1 0 0 0
9 2788 Alcott-2788: Little Men 1 0 2 0
10 2804 Alcott-2804: Rose in Bloom 1 0 0 3
11 3499 Alcott-3499: Jo's Boys 1 0 0 0
8 2787 Alcott-2787: An Old - fashioned Girl 2 0 1 2
0 1 Poe-1: The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition 3 1 0 1
1 2 Poe-2: The Works of Edgar Allan Poe Volume 2 (of 5) of the Raven Edition 3 2 0 1
2 3 Poe-3: The Works of Edgar Allan Poe Volume 3 (of 5) of the Raven Edition 3 2 0 1
3 4 Poe-4: The Works of Edgar Allan Poe Volume 4 (of 5) of the Raven Edition 3 3 0 1
4 5 Poe-5: The Works of Edgar Allan Poe Volume 5 (of 5) of the Raven Edition 3 2 0 1

Save Work

In [169]:
DOC.to_csv('DOC3.csv')
LIB.to_csv('LIB3.csv')
TFIDF.to_csv('TFIDF_book_final.csv')

Principal Component Analysis

In [170]:
OHCO = ['book_id', 'chap_num']
In [171]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from scipy.linalg import norm
In [172]:
import plotly_express as px
import seaborn as sns
In [173]:
sns.set(style='ticks')
%matplotlib inline
In [174]:
LIB = pd.read_csv('LIB3.csv').set_index('book_id')
VOCAB = pd.read_csv('VOCAB2.csv').set_index('term_id')
TFIDF = pd.read_csv('TFIDF.csv').set_index(OHCO)
In [175]:
TFIDF.shape
Out[175]:
(248, 29556)

Normalize doc vector lengths

We use L2 normalization

In [176]:
# TFIDF = TFIDF.apply(lambda x: x / norm(x, 2), 1)
In [177]:
TFIDF = TFIDF.apply(lambda x: x / np.sqrt(np.square(x).sum()), 1)

Normalize term vector variance

We do not normalize variance, which we would normally do, such as with data containing divergent units of measure. \ This is because to do so would exaggerate the importance of rare words (see Ng, 2008: 6m40s — 8m00s).

Center the word vectors

Note that we are taking the column-wise means -- the means for the term vectors. \ We don't really need to do this. But it is typical for PCA. \ NOTE: Some argue that centering alters the cosine angles.

In [178]:
TFIDF = TFIDF - TFIDF.mean()

Compute Covariance Matrix

$n = |X| = |Y|$

$Cov(X,Y) = \dfrac{\sum_{i=1}^{n} (x_i - \mu_X) (y_i - \mu_Y)}{n - 1} = \dfrac{XY}{n-1}$

We could we use the built in Pandas method here, but compute it ourselves.

In [179]:
# COV = TFIDF.cov() # This also centers the vectors
COV = TFIDF.T.dot(TFIDF) / (TFIDF.shape[0] - 1)
In [180]:
COV.head()
Out[180]:
1 2 3 4 5 6 7 8 9 10 ... 29549 29550 29551 29552 29553 29554 29555 29556 29557 29558
1 3.759325e-07 -5.998677e-09 -4.502953e-09 -4.502953e-09 -1.493126e-09 -5.998677e-09 -5.998677e-09 -4.176768e-09 -4.176768e-09 4.916907e-07 ... -7.653323e-09 -3.180492e-09 -3.180492e-09 -7.837017e-09 -7.837017e-09 -2.392873e-09 -2.392873e-09 -1.181405e-09 -2.392873e-09 -1.094845e-08
2 -5.998677e-09 5.839759e-06 -1.774761e-08 -1.774761e-08 -5.884898e-09 5.839759e-06 5.839759e-06 -1.646201e-08 -1.646201e-08 1.904993e-06 ... -3.016425e-08 -1.253536e-08 -1.253536e-08 -3.088825e-08 -3.088825e-08 -9.431096e-09 -9.431096e-09 -4.656305e-09 -9.431096e-09 -4.315144e-08
3 -4.502953e-09 -1.774761e-08 3.290628e-06 3.290628e-06 -4.417544e-09 -1.774761e-08 -1.774761e-08 -1.235733e-08 -1.235733e-08 6.000546e-06 ... -2.264303e-08 -9.409764e-09 -9.409764e-09 -2.318650e-08 -2.318650e-08 -7.079525e-09 -7.079525e-09 -3.495291e-09 -7.079525e-09 -3.239196e-08
4 -4.502953e-09 -1.774761e-08 3.290628e-06 3.290628e-06 -4.417544e-09 -1.774761e-08 -1.774761e-08 -1.235733e-08 -1.235733e-08 6.000546e-06 ... -2.264303e-08 -9.409764e-09 -9.409764e-09 -2.318650e-08 -2.318650e-08 -7.079525e-09 -7.079525e-09 -3.495291e-09 -7.079525e-09 -3.239196e-08
5 -1.493126e-09 -5.884898e-09 -4.417544e-09 -4.417544e-09 3.618070e-07 -5.884898e-09 -5.884898e-09 -4.097546e-09 -4.097546e-09 3.347574e-07 ... -7.508161e-09 -3.120167e-09 -3.120167e-09 -7.688370e-09 -7.688370e-09 -2.347487e-09 -2.347487e-09 -1.158997e-09 -2.347487e-09 -1.074079e-08

5 rows × 29556 columns

In [181]:
COV.iloc[:5,:10].style.background_gradient()
Out[181]:
1 2 3 4 5 6 7 8 9 10
1 0.000000 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 0.000000
2 -0.000000 0.000006 -0.000000 -0.000000 -0.000000 0.000006 0.000006 -0.000000 -0.000000 0.000002
3 -0.000000 -0.000000 0.000003 0.000003 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 0.000006
4 -0.000000 -0.000000 0.000003 0.000003 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 0.000006
5 -0.000000 -0.000000 -0.000000 -0.000000 0.000000 -0.000000 -0.000000 -0.000000 -0.000000 0.000000

Decompose the Matrix

There a at least three options to choose from. We go with SciPy's Hermitian Eigendecomposition \ method eigh(), since our covarience matrix is symmetric.

In [182]:
from scipy.linalg import eigh
In [183]:
%time eig_vals, eig_vecs = eigh(COV)
Wall time: 38min 7s

Convert eigen data to dataframes

In [184]:
TERM_IDX = COV.index
In [185]:
EIG_VEC = pd.DataFrame(eig_vecs, index=TERM_IDX, columns=TERM_IDX)
In [186]:
EIG_VAL = pd.DataFrame(eig_vals, index=TERM_IDX, columns=['eig_val'])
EIG_VAL.index.name = 'term_id'
In [187]:
EIG_VEC.iloc[:5, :10].style.background_gradient()
Out[187]:
term_id 1 2 3 4 5 6 7 8 9 10
term_id
1 0.000190 -0.001031 -0.000304 -0.000409 -0.000115 -0.000882 0.000172 -0.000482 0.000705 0.000537
2 0.050576 -0.001631 -0.017232 0.050186 -0.004421 -0.006207 -0.016342 -0.041056 0.080598 0.024476
3 -0.251753 -0.438206 -0.160220 -0.297737 0.218325 -0.217058 0.075961 0.031302 0.078241 0.029414
4 0.527026 -0.456357 0.027362 0.023019 -0.124215 0.060804 -0.069133 0.047233 -0.110051 -0.025309
5 -0.508285 -0.242777 0.262401 0.130595 -0.323529 -0.142930 -0.171910 -0.129455 0.028169 0.015121
In [188]:
EIG_VAL.iloc[:5]
Out[188]:
eig_val
term_id
1 -1.677703e-16
2 -1.053346e-16
3 -7.552308e-17
4 -6.765879e-17
5 -6.184650e-17

Select Principal Components

Next, we associate each eigenvalue with its corresponding column in the eigenvalue matrix. \ This is why we transpose the EIG_VEC dataframe.

Combine eigenvalues and eignvectors

In [189]:
EIG_PAIRS = EIG_VAL.join(EIG_VEC.T)
In [190]:
EIG_PAIRS.head()
Out[190]:
eig_val 1 2 3 4 5 6 7 8 9 ... 29549 29550 29551 29552 29553 29554 29555 29556 29557 29558
term_id
1 -1.677703e-16 0.000190 0.050576 -0.251753 0.527026 -0.508285 0.428594 -0.239088 0.061406 -0.002834 ... 0.000362 -0.000065 -0.000066 0.000383 -0.000073 0.000050 -0.000034 -0.000136 0.000084 0.000143
2 -1.053346e-16 -0.001031 -0.001631 -0.438206 -0.456357 -0.242777 0.159917 0.033440 -0.069839 -0.001033 ... 0.001012 -0.000332 -0.000327 0.000570 0.000857 0.000218 0.000354 -0.000170 0.000242 0.000354
3 -7.552308e-17 -0.000304 -0.017232 -0.160220 0.027362 0.262401 -0.205032 -0.398608 0.293331 0.136964 ... 0.000506 0.000581 0.000528 0.000188 0.000176 0.000148 0.000014 -0.000157 -0.000029 0.000796
4 -6.765879e-17 -0.000409 0.050186 -0.297737 0.023019 0.130595 -0.080832 -0.008371 0.244443 0.058025 ... 0.000134 -0.000367 -0.000422 -0.000012 -0.000009 -0.000336 -0.000315 -0.000287 -0.000396 0.000387
5 -6.184650e-17 -0.000115 -0.004421 0.218325 -0.124215 -0.323529 -0.200387 -0.031553 0.464226 0.169672 ... -0.000131 -0.000459 -0.000463 -0.000195 -0.000245 0.000245 0.000235 0.000227 0.000307 -0.000142

5 rows × 29557 columns

Next, we sort in descending order and pick the top K (=10).

Compute and Show Explained Variance

We might have usd this value to sort our components.

In [191]:
EIG_PAIRS['exp_var'] = np.round((EIG_PAIRS.eig_val / EIG_PAIRS.eig_val.sum()) * 100, 2)
In [192]:
EIG_PAIRS.exp_var.sort_values(ascending=False).head().plot.bar(rot=45)
Out[192]:
<matplotlib.axes._subplots.AxesSubplot at 0x247057f6630>

Pick Top K (10) Components

We pick these based on explained variance.

In [193]:
COMPS = EIG_PAIRS.sort_values('exp_var', ascending=False).head(10).reset_index(drop=True)
COMPS.index.name = 'comp_id'
COMPS.index = ["PC{}".format(i) for i in COMPS.index.tolist()]
In [194]:
COMPS
Out[194]:
eig_val 1 2 3 4 5 6 7 8 9 ... 29550 29551 29552 29553 29554 29555 29556 29557 29558 exp_var
PC0 0.050096 0.000028 0.000069 0.000090 0.000090 3.489732e-05 0.000069 0.000069 0.000099 0.000099 ... 0.000055 0.000055 0.000113 0.000113 0.000047 0.000047 0.000021 0.000047 0.000196 5.34
PC1 0.039452 -0.000052 -0.000191 -0.000168 -0.000168 -6.636143e-05 -0.000191 -0.000191 -0.000254 -0.000254 ... -0.000111 -0.000111 -0.000251 -0.000251 -0.000088 -0.000088 -0.000046 -0.000088 -0.000404 4.20
PC2 0.036098 0.000009 0.000035 0.000041 0.000041 2.256769e-05 0.000035 0.000035 0.000132 0.000132 ... 0.000021 0.000021 0.000029 0.000029 0.000032 0.000032 0.000015 0.000032 0.000085 3.85
PC3 0.029467 0.000154 0.000548 0.000495 0.000495 2.016357e-04 0.000548 0.000548 0.000450 0.000450 ... 0.000356 0.000356 0.000590 0.000590 0.000259 0.000259 0.000134 0.000259 0.001111 3.14
PC4 0.026793 -0.000068 -0.000234 -0.000228 -0.000228 -9.533447e-05 -0.000234 -0.000234 -0.000326 -0.000326 ... -0.000148 -0.000148 -0.000347 -0.000347 -0.000135 -0.000135 -0.000068 -0.000135 -0.000550 2.85
PC5 0.023343 -0.000327 -0.001058 -0.001078 -0.001078 -4.862476e-04 -0.001058 -0.001058 -0.000743 -0.000743 ... -0.000743 -0.000743 -0.001283 -0.001283 -0.000592 -0.000592 -0.000307 -0.000592 -0.002542 2.49
PC6 0.015340 0.000129 0.000281 0.000285 0.000285 2.311249e-04 0.000281 0.000281 -0.006631 -0.006631 ... 0.000211 0.000211 0.000329 0.000329 0.000251 0.000251 0.000130 0.000251 0.001117 1.63
PC7 0.012568 -0.000085 -0.000046 -0.000210 -0.000210 -1.387821e-04 -0.000046 -0.000046 0.000178 0.000178 ... -0.000198 -0.000198 -0.000039 -0.000039 -0.000111 -0.000111 -0.000065 -0.000111 -0.000523 1.34
PC8 0.010517 0.000015 -0.000148 -0.000081 -0.000081 -1.883173e-07 -0.000148 -0.000148 -0.000195 -0.000195 ... 0.000070 0.000070 -0.000136 -0.000136 -0.000040 -0.000040 -0.000002 -0.000040 -0.000140 1.12
PC9 0.008898 -0.000112 0.000031 -0.000189 -0.000189 -1.880461e-04 0.000031 0.000031 -0.000031 -0.000031 ... -0.000292 -0.000292 -0.000127 -0.000127 -0.000133 -0.000133 -0.000148 -0.000133 -0.000351 0.95

10 rows × 29558 columns

Inspect terms associated with eigenvectors

In [195]:
VOCAB.loc[[int(x) for x in EIG_PAIRS.sort_values('exp_var', ascending=False).head(10).index], 'term_str']
Out[195]:
term_id
29558          être
29557           été
29556       émeutes
29555         élite
29554      échapper
29553    æstheticus
29552      æschylus
29551    ærostation
29550      æronauts
29549       æronaut
Name: term_str, dtype: object

Show Loadings

Loadings sow the contribution of each term to the component. \ We'll just look at the topi 10 words for the first two components in the Book version.

In [196]:
LOADINGS = COMPS[TERM_IDX].T
LOADINGS.index.name = 'term_id'
In [197]:
LOADINGS.head().style.background_gradient()
Out[197]:
PC0 PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9
term_id
1 0.000028 -0.000052 0.000009 0.000154 -0.000068 -0.000327 0.000129 -0.000085 0.000015 -0.000112
2 0.000069 -0.000191 0.000035 0.000548 -0.000234 -0.001058 0.000281 -0.000046 -0.000148 0.000031
3 0.000090 -0.000168 0.000041 0.000495 -0.000228 -0.001078 0.000285 -0.000210 -0.000081 -0.000189
4 0.000090 -0.000168 0.000041 0.000495 -0.000228 -0.001078 0.000285 -0.000210 -0.000081 -0.000189
5 0.000035 -0.000066 0.000023 0.000202 -0.000095 -0.000486 0.000231 -0.000139 -0.000000 -0.000188
In [198]:
LOADINGS['term_str'] = LOADINGS.apply(lambda x: VOCAB.loc[int(x.name)].term_str, 1)
In [199]:
l0_pos = LOADINGS.sort_values('PC0', ascending=True).head(10).term_str.str.cat(sep=' ')
l0_neg = LOADINGS.sort_values('PC0', ascending=False).head(10).term_str.str.cat(sep=' ')
l1_pos = LOADINGS.sort_values('PC1', ascending=True).head(10).term_str.str.cat(sep=' ')
l1_neg = LOADINGS.sort_values('PC1', ascending=False).head(10).term_str.str.cat(sep=' ')
In [200]:
print('Books PC0+', l0_pos)
print('Books PC0-', l0_neg)
print('Books PC1+', l1_pos)
print('Books PC1-', l1_neg)
Books PC0+ polly tom fanny maud fan shaw trix sydney grandma nt
Books PC0- jo ben meg dan rose mac laurie beth phebe bab
Books PC1+ ben bab celia thorny betty mac rose phebe sancho sanch
Books PC1- jo meg laurie dan beth amy march nat bhaer demi

Project Docs onto New Subspace

We get the dot product of the DTM matrix and the new component matrix, which we will call DCM -- for document-component matrix. \ This has the effect of replacing the features of the DTM with the features of the transposed component matrix.

In [201]:
DCM = TFIDF.dot(COMPS[TERM_IDX].T)
In [202]:
DCM
Out[202]:
PC0 PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9
book_id chap_num
1 1 0.045586 -0.068268 0.021243 0.154930 -0.066606 -0.295966 0.092448 -0.045482 -0.000052 -0.043629
2 0.036468 -0.052018 0.007954 0.116133 -0.046357 -0.195441 0.050429 -0.027351 0.004047 -0.025503
3 0.031147 -0.054016 0.006790 0.103811 -0.048130 -0.174429 0.038767 -0.013583 -0.005808 -0.001074
4 0.038437 -0.061656 0.016392 0.134503 -0.061723 -0.256758 0.085044 -0.032434 0.002938 -0.054203
5 0.034846 -0.060136 0.017363 0.130076 -0.060269 -0.236491 0.065547 -0.026885 -0.000811 -0.043410
... ... ... ... ... ... ... ... ... ... ... ...
3795 20 0.099089 -0.425193 -0.435227 -0.217947 0.045435 0.081270 0.018139 -0.001406 0.001315 0.030075
21 0.074833 -0.294456 -0.290467 -0.129541 0.024497 0.043422 0.025105 0.004605 0.004458 0.018747
22 0.086546 -0.360335 -0.361546 -0.169126 0.033315 0.056944 0.015199 -0.011090 -0.005427 -0.012233
23 0.103214 -0.451264 -0.475770 -0.251713 0.058225 0.107191 0.009404 -0.011621 -0.007712 0.014169
24 0.056451 -0.178924 -0.106545 0.053196 -0.053017 -0.164683 -0.932541 0.018355 -0.024794 -0.007313

248 rows × 10 columns

We add metadata to our new, reduced matrices for display purposes.

In [203]:
DCM = DCM.join(LIB[['author','title']], on='book_id')
In [204]:
DCM['doc'] = DCM.apply(lambda x: "{}-{}-{}".format(x.author, x.title, x.name[1]), 1)
In [205]:
DCM.head().style.background_gradient()
Out[205]:
PC0 PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 author title doc
book_id chap_num
1 1 0.045586 -0.068268 0.021243 0.154930 -0.066606 -0.295966 0.092448 -0.045482 -0.000052 -0.043629 Poe The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition Poe-The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition-1
2 0.036468 -0.052018 0.007954 0.116133 -0.046357 -0.195441 0.050429 -0.027351 0.004047 -0.025503 Poe The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition Poe-The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition-2
3 0.031147 -0.054016 0.006790 0.103811 -0.048130 -0.174429 0.038767 -0.013583 -0.005808 -0.001074 Poe The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition Poe-The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition-3
4 0.038437 -0.061656 0.016392 0.134503 -0.061723 -0.256758 0.085044 -0.032434 0.002938 -0.054203 Poe The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition Poe-The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition-4
5 0.034846 -0.060136 0.017363 0.130076 -0.060269 -0.236491 0.065547 -0.026885 -0.000811 -0.043410 Poe The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition Poe-The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition-5

Visualize

In [206]:
def vis_pcs(M, a, b, label='author', prefix='PC'):
    fig = px.scatter(M, prefix + str(a), prefix + str(b), 
                        color=label, 
                        hover_name='doc', marginal_x='box')
    fig.show()

PC 0 and 1

In [207]:
vis_pcs(DCM, 0, 1)
In [208]:
vis_pcs(DCM, 0, 1, label='title')

PC 1 and 2

In [209]:
vis_pcs(DCM, 1, 2)
In [210]:
vis_pcs(DCM, 1, 2, label='title')

PC 2 and 3

In [211]:
vis_pcs(DCM, 2, 3)
In [212]:
vis_pcs(DCM, 2, 3, label='title')

PC 3 and 4

In [213]:
vis_pcs(DCM, 3, 4, label='author')
In [214]:
vis_pcs(DCM, 3, 4, label='title')

Compare to Scikit Learn

In [215]:
pca_engine = PCA(n_components=10)
In [216]:
DCM_sk = pd.DataFrame(pca_engine.fit_transform(TFIDF), index=TFIDF.index)
DCM_sk.columns = ['PC{}'.format(i) for i in DCM_sk.columns]
DCM_sk = DCM_sk.join(LIB[['author','title']], on='book_id')
DCM_sk['doc'] = DCM_sk.apply(lambda x: "{}-{}-{}".format(x.author, x.title, x.name[1]), 1)
In [217]:
DCM_sk.head().style.background_gradient()
Out[217]:
PC0 PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 author title doc
book_id chap_num
1 1 -0.045585 -0.068268 0.021243 0.154931 -0.066607 -0.295969 -0.092394 -0.046224 -0.001190 -0.049722 Poe The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition Poe-The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition-1
2 -0.036468 -0.052018 0.007954 0.116133 -0.046357 -0.195443 -0.050399 -0.027435 0.002127 -0.025961 Poe The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition Poe-The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition-2
3 -0.031147 -0.054016 0.006790 0.103810 -0.048132 -0.174428 -0.038739 -0.014089 -0.003805 -0.005026 Poe The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition Poe-The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition-3
4 -0.038438 -0.061656 0.016393 0.134507 -0.061720 -0.256770 -0.084901 -0.031977 -0.001425 -0.065950 Poe The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition Poe-The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition-4
5 -0.034846 -0.060136 0.017363 0.130080 -0.060267 -0.236501 -0.065406 -0.026532 -0.004682 -0.054766 Poe The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition Poe-The Works of Edgar Allan Poe Volume 1 (of 5) of the Raven Edition-5

PC 0 and 1

In [218]:
vis_pcs(DCM_sk, 0, 1)

PC 0 and 1

In [219]:
vis_pcs(DCM_sk, 0, 1)
In [222]:
# px.scatter_3d(DCM_sk, 'PC0', 'PC1','PC2', color='title', hover_name='doc', height=1000, width=1200)

Show Loadings

In [223]:
LOADINGS_sk = pd.DataFrame(pca_engine.components_.T * np.sqrt(pca_engine.explained_variance_))
LOADINGS_sk.columns = ["PC{}".format(i) for i in LOADINGS_sk.columns]
In [224]:
LOADINGS_sk.index = TFIDF.columns
LOADINGS_sk.index.name = 'term_id'
LOADINGS_sk['term_str'] = LOADINGS_sk.apply(lambda x: VOCAB.loc[int(x.name)].term_str, 1)
In [225]:
pc0_pos = LOADINGS_sk.sort_values('PC0', ascending=False).head(10).term_str.str.cat(sep=' ')
pc0_neg = LOADINGS_sk.sort_values('PC0', ascending=True).head(10).term_str.str.cat(sep=' ')
pc1_pos = LOADINGS_sk.sort_values('PC1', ascending=False).head(10).term_str.str.cat(sep=' ')
pc1_neg = LOADINGS_sk.sort_values('PC1', ascending=True).head(10).term_str.str.cat(sep=' ')
In [226]:
print('BOOKS PC0+', pc0_pos)
print('BOOKS PC0-', pc0_neg)
print('BOOKS PC1+', pc1_pos)
print('BOOKS PC1-', pc1_neg)
BOOKS PC0+ polly tom fanny maud fan shaw trix sydney grandma nt
BOOKS PC0- jo ben meg dan rose mac laurie beth phebe bab
BOOKS PC1+ jo meg laurie dan beth amy march nat bhaer demi
BOOKS PC1- ben bab celia thorny betty mac rose phebe sancho sanch

SAVE

In [227]:
DCM.to_csv('PCA_DCM_chaps.csv')
COMPS.to_csv('PCA_TCM_chaps.csv')
DCM.to_csv('PCA_DCM_books.csv')
COMPS.to_csv('PCA_TCM_books.csv')

Topic Models (LDA)

In [228]:
n_terms = 4000
n_topics = 30
# raf - do minimum of 20 
max_iter = 5
OHCO = ['book_id', 'chap_num', 'para_num']

Imports

In [229]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation as LDA
In [230]:
%matplotlib inline

Prepare Data

Convert TOKENS to table of paragraphs

Scikit Learn wants an F1 style corpus. We create onefrom our annotated TOKEN table, keeping only regular nouns.

In [231]:
TOKENS = pd.read_csv('TOKEN2.csv')
In [232]:
TOKENS.head()
Out[232]:
book_id chap_num para_num sent_num token_num pos_tuple pos token_str term_str term_id
0 2787 1 1 0 1 ('IT', 'NNP') NNP IT it 14145
1 2787 1 1 0 2 ("'S", 'POS') POS 'S s 22463
2 2787 1 1 0 3 ('time', 'NN') NN time time 26439
3 2787 1 1 0 4 ('to', 'TO') TO to to 26510
4 2787 1 1 0 5 ('go', 'VB') VB go go 11429
In [233]:
PARAS = TOKENS[TOKENS.pos.str.match(r'^NNS?$')]\
    .groupby(OHCO).term_str\
    .apply(lambda x: ' '.join(x))\
    .to_frame()\
    .rename(columns={'term_str':'para_str'})
In [234]:
PARAS.head()
Out[234]:
para_str
book_id chap_num para_num
1 1 1 accounts city state excitement phenomena natur...
2 day date crowd people purposes square city day...
3 origin bulk masses area space queer substance ...
4 meantime city object curiosity cause smoke min...
5 balloon doubt feet earth crowd view person occ...

Create Vector Space

We use Scikit Learn's CountVectorizer to convert our F1 corpus of paragraphs into a document-term vector space of word counts.

In [235]:
tfv = CountVectorizer(max_features=n_terms, stop_words='english')
tf = tfv.fit_transform(PARAS.para_str)
TERMS = tfv.get_feature_names()

Generate Model

We run Scikit Learn's LatentDirichletAllocation algorithm and extract the THETA and PHI tables.

In [236]:
lda = LDA(n_components=n_topics, max_iter=max_iter, learning_offset=50., random_state=0)

THETA

In [237]:
THETA = pd.DataFrame(lda.fit_transform(tf), index=PARAS.index)
THETA.columns.name = 'topic_id'
In [238]:
THETA.sample(20).style.background_gradient()
Out[238]:
topic_id 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
book_id chap_num para_num
2786 14 4 0.008333 0.008333 0.008333 0.008333 0.240102 0.008333 0.008333 0.008333 0.008333 0.008333 0.526564 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333 0.008333
9 54 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.516667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667
2726 9 88 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.001667 0.951667 0.001667
514 9 73 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.516667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667
21 91 0.211616 0.004167 0.004167 0.004167 0.004167 0.004167 0.544029 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167 0.131855 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167 0.004167
2804 24 20 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.528560 0.003030 0.003030 0.003030 0.003030 0.200323 0.189299 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030
2786 21 40 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.677778 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111
514 25 29 0.002222 0.002222 0.002222 0.356931 0.580846 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222 0.002222
2726 21 4 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333 0.685035 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333 0.123445 0.101520 0.003333 0.003333 0.003333 0.003333 0.003333 0.003333
5 2 25 0.011111 0.011111 0.011111 0.011111 0.407977 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.280911 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111
2787 16 12 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.592953 0.006667 0.006667 0.006667 0.006667 0.220380 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667
5 1 3 0.001852 0.001852 0.001852 0.001852 0.001852 0.328877 0.001852 0.001852 0.001852 0.201613 0.001852 0.001852 0.419510 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852 0.001852
3795 15 61 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.590653 0.006667 0.006667 0.006667 0.006667 0.222680 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667 0.006667
2726 15 83 0.341400 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.176786 0.005556 0.005556 0.005556 0.005556 0.331814 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556 0.005556
18 59 0.004762 0.338861 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.527805 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762 0.004762
1 2 9 0.003030 0.003030 0.666727 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.248424 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030 0.003030
2787 15 94 0.003704 0.156217 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.502373 0.003704 0.003704 0.241410 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704 0.003704
18 40 0.011111 0.011111 0.677778 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111 0.011111
2804 23 14 0.002564 0.002564 0.002564 0.450859 0.002564 0.002564 0.002564 0.002564 0.061532 0.002564 0.002564 0.002564 0.002564 0.002564 0.002564 0.002564 0.002564 0.002564 0.002564 0.002564 0.002564 0.418378 0.002564 0.002564 0.002564 0.002564 0.002564 0.002564 0.002564 0.002564
3795 8 34 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.516667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667 0.016667

PHI

In [239]:
PHI = pd.DataFrame(lda.components_, columns=TERMS)
PHI.index.name = 'topic_id'
PHI.columns.name  = 'term_str'
In [240]:
PHI.T.head().style.background_gradient()
Out[240]:
topic_id 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
term_str
aback 1.033333 0.033333 0.251742 0.033333 0.033333 0.033333 0.814925 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 4.033333 1.033333 2.033333 0.033333 0.033333 0.033333 0.033333 0.033333 2.033333 0.033333 0.033333 0.033333
ability 2.033287 0.093171 4.890068 0.033333 0.033333 3.265507 0.033333 1.859361 0.033333 1.008706 0.033333 1.033333 0.354354 1.633178 0.033333 0.033334 0.033333 0.033333 0.033333 2.802126 0.104133 0.033333 0.033333 0.033333 0.033333 5.322708 0.033399 0.033334 0.033333 0.033333
absence 2.112087 9.632519 3.307857 0.033333 1.237051 6.487703 4.407342 0.639597 2.732606 5.765650 1.890409 0.033333 1.128253 0.033333 0.194323 0.424132 0.794770 0.033333 2.843909 0.039627 4.754014 0.446947 0.033333 0.033333 0.033333 2.324727 0.459613 0.620334 10.201389 3.321809
absent 1.033333 0.033333 1.512178 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.909590 2.008445 0.033333 0.579377 0.033333 0.033333 0.033333 1.157076 0.033333 4.033333
absorbed 0.033333 0.033333 0.033333 0.033333 2.026674 0.033333 0.033333 0.033333 0.033333 0.033333 0.033333 0.767203 0.033333 0.033333 0.033333 1.211021 0.033333 0.033333 0.033333 0.033333 0.033333 2.034619 0.033333 1.033333 0.033333 0.033333 0.033333 0.033333 0.033333 1.127149

Inspect Results

Get Top Terms per Topic

In [241]:
TOPICS = PHI.stack().to_frame().rename(columns={0:'weight'})\
    .groupby('topic_id')\
    .apply(lambda x: 
           x.weight.sort_values(ascending=False)\
               .head(10)\
               .reset_index()\
               .drop('topic_id',1)\
               .term_str)
In [242]:
TOPICS
Out[242]:
term_str 0 1 2 3 4 5 6 7 8 9
topic_id
0 chance idea corner dog thing things face smile eye fun
1 life word mind letter letters flowers boy friends face things
2 uncle time piece manner hand board fact length box thing
3 arm breath course window round home head eyes queer face
4 eyes face hand look heart hands voice words tender kind
5 eyes question man head happiness life sake face cause love
6 way heart life music time pleasure sort world day father
7 head laughter face thing boys work places effect girls steps
8 man tone fellow head book words minute business eye look
9 beauty days scene man style roof feet articles object girls
10 sir air laugh turn joke answer lady time song people
11 heart face thank thing boy hands girls man satisfaction troubles
12 day time thing child money girl spite boy girls eyes
13 dear trouble time people duty game wits things order home
14 boys boy girls children time ladies things water boat end
15 surprise week way love child hurry course day everybody heart
16 hair dozen times news information face clothes thing sound door
17 death nature idea length feelings reason fancy glance time horror
18 wall reply time hill till bird eyes air stage legs
19 thing feet earth time balloon miles surface world man distance
20 work tm works terms money people things maam agreement ways
21 room things ones chair day house brother door way boys
22 story year place school home boys house children time years
23 mother home father morning way girls bread glad country folks
24 things dress hand paper silk king gloves leaves sort child
25 matter man things home boys law parlor ha time cousin
26 door box moon foot doors room gold drawer portion walls
27 girl boys night thing way lady time pardon mother nod
28 voice body hour day night corpse river opinion words man
29 time bit man head gentleman word lot love heart right
In [243]:
TOPICS['label'] = TOPICS.apply(lambda x: str(x.name) + ' ' + ' '.join(x), 1)

Sort Topics by Doc Weight

In [244]:
TOPICS['doc_weight_sum'] = THETA.sum()
In [245]:
TOPICS.sort_values('doc_weight_sum', ascending=True).plot.barh(y='doc_weight_sum', x='label', figsize=(5,10)) 
Out[245]:
<matplotlib.axes._subplots.AxesSubplot at 0x24709068278>

Explore Topics by Author

In [246]:
LIB = pd.read_csv('LIB3.csv').set_index('book_id')
In [247]:
topic_cols = [t for t in range(n_topics)]
AUTHORS = THETA.join(LIB, on='book_id')\
    .reset_index().set_index(['author']+OHCO)\
    .groupby('author')[topic_cols].mean()\
    .T                                            
AUTHORS.index.name = 'topic_id'
In [248]:
AUTHORS['topterms'] = TOPICS[[i for i in range(10)]].apply(lambda x: ' '.join(x), 1)
In [249]:
AUTHORS.sort_values('Alcott', ascending=False).style.background_gradient()
Out[249]:
author Alcott Poe topterms
topic_id
4 0.058236 0.026527 eyes face hand look heart hands voice words tender kind
14 0.051595 0.019103 boys boy girls children time ladies things water boat end
21 0.044982 0.016925 room things ones chair day house brother door way boys
1 0.044380 0.027477 life word mind letter letters flowers boy friends face things
23 0.043846 0.017903 mother home father morning way girls bread glad country folks
12 0.043372 0.021972 day time thing child money girl spite boy girls eyes
6 0.042088 0.025352 way heart life music time pleasure sort world day father
24 0.041513 0.024015 things dress hand paper silk king gloves leaves sort child
0 0.037182 0.023251 chance idea corner dog thing things face smile eye fun
27 0.036441 0.020809 girl boys night thing way lady time pardon mother nod
22 0.036102 0.024793 story year place school home boys house children time years
8 0.036012 0.048340 man tone fellow head book words minute business eye look
29 0.035843 0.035062 time bit man head gentleman word lot love heart right
10 0.033345 0.030228 sir air laugh turn joke answer lady time song people
13 0.032951 0.021045 dear trouble time people duty game wits things order home
16 0.032884 0.019800 hair dozen times news information face clothes thing sound door
3 0.032830 0.022840 arm breath course window round home head eyes queer face
15 0.032391 0.029738 surprise week way love child hurry course day everybody heart
20 0.032214 0.023679 work tm works terms money people things maam agreement ways
11 0.032179 0.024309 heart face thank thing boy hands girls man satisfaction troubles
18 0.029091 0.032364 wall reply time hill till bird eyes air stage legs
7 0.029053 0.023526 head laughter face thing boys work places effect girls steps
25 0.023863 0.040609 matter man things home boys law parlor ha time cousin
19 0.021997 0.056067 thing feet earth time balloon miles surface world man distance
26 0.021389 0.042705 door box moon foot doors room gold drawer portion walls
28 0.020691 0.071338 voice body hour day night corpse river opinion words man
5 0.019059 0.056162 eyes question man head happiness life sake face cause love
2 0.018633 0.065651 uncle time piece manner hand board fact length box thing
17 0.018551 0.061727 death nature idea length feelings reason fancy glance time horror
9 0.017287 0.046684 beauty days scene man style roof feet articles object girls
In [251]:
AUTHORS.sort_values('Poe', ascending=False).style.background_gradient()
Out[251]:
author Alcott Poe topterms
topic_id
28 0.020691 0.071338 voice body hour day night corpse river opinion words man
2 0.018633 0.065651 uncle time piece manner hand board fact length box thing
17 0.018551 0.061727 death nature idea length feelings reason fancy glance time horror
5 0.019059 0.056162 eyes question man head happiness life sake face cause love
19 0.021997 0.056067 thing feet earth time balloon miles surface world man distance
8 0.036012 0.048340 man tone fellow head book words minute business eye look
9 0.017287 0.046684 beauty days scene man style roof feet articles object girls
26 0.021389 0.042705 door box moon foot doors room gold drawer portion walls
25 0.023863 0.040609 matter man things home boys law parlor ha time cousin
29 0.035843 0.035062 time bit man head gentleman word lot love heart right
18 0.029091 0.032364 wall reply time hill till bird eyes air stage legs
10 0.033345 0.030228 sir air laugh turn joke answer lady time song people
15 0.032391 0.029738 surprise week way love child hurry course day everybody heart
1 0.044380 0.027477 life word mind letter letters flowers boy friends face things
4 0.058236 0.026527 eyes face hand look heart hands voice words tender kind
6 0.042088 0.025352 way heart life music time pleasure sort world day father
22 0.036102 0.024793 story year place school home boys house children time years
11 0.032179 0.024309 heart face thank thing boy hands girls man satisfaction troubles
24 0.041513 0.024015 things dress hand paper silk king gloves leaves sort child
20 0.032214 0.023679 work tm works terms money people things maam agreement ways
7 0.029053 0.023526 head laughter face thing boys work places effect girls steps
0 0.037182 0.023251 chance idea corner dog thing things face smile eye fun
3 0.032830 0.022840 arm breath course window round home head eyes queer face
12 0.043372 0.021972 day time thing child money girl spite boy girls eyes
13 0.032951 0.021045 dear trouble time people duty game wits things order home
27 0.036441 0.020809 girl boys night thing way lady time pardon mother nod
16 0.032884 0.019800 hair dozen times news information face clothes thing sound door
14 0.051595 0.019103 boys boy girls children time ladies things water boat end
23 0.043846 0.017903 mother home father morning way girls bread glad country folks
21 0.044982 0.016925 room things ones chair day house brother door way boys
In [252]:
import plotly_express as px
In [253]:
# px.scatter(AUTHORS.reset_index(), 'Alcott', 'melville', hover_name='topterms', text='topic_id')\
#     .update_traces(mode='text')

Clutser Topics

In [254]:
import scipy.cluster.hierarchy as sch
from scipy.spatial.distance import pdist
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt
In [255]:
def plot_tree(tree, labels):
    plt.figure()
    fig, axes = plt.subplots(figsize=(5, 10))
    dendrogram = sch.dendrogram(tree, labels=labels, orientation="left")
    plt.tick_params(axis='both', which='major', labelsize=14)
In [256]:
SIMS = pdist(normalize(PHI), metric='euclidean')
TREE = sch.linkage(SIMS, method='ward')
In [257]:
labels  = ["{}: {}".format(a,b) for a, b in zip(AUTHORS.index,  AUTHORS.topterms.tolist())]
In [258]:
plot_tree(TREE, labels)
<Figure size 432x288 with 0 Axes>

LDA Viz

In [259]:
import pyLDAvis
import pyLDAvis.sklearn
pyLDAvis.enable_notebook()
In [261]:
pyLDAvis.sklearn.prepare(lda, tf, tfv)
Out[261]:

Word Embeddings (word2vec)

In [262]:
OHCO = ['book_id', 'chap_num', 'para_num', 'sent_num', 'token_num']
BAG = OHCO[:4] # Paragraphs
# BAG = OHCO[:5] # Sentences
window = 5

Imports

In [263]:
from gensim.models import word2vec
from sklearn.manifold import TSNE
import plotly_express as px
C:\ProgramData\Anaconda3\lib\site-packages\gensim\corpora\dictionary.py:11: DeprecationWarning:

Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working

C:\ProgramData\Anaconda3\lib\site-packages\scipy\sparse\sparsetools.py:21: DeprecationWarning:

`scipy.sparse.sparsetools` is deprecated!
scipy.sparse.sparsetools is a private module for scipy.sparse, and should not be used.

In [264]:
%matplotlib inline

Process

Import TOKENS and convert to a corpus for Gensim

We import data from the TOKEN table of the novels corpus, excluding proper nouns.

In [265]:
TOKENS = pd.read_csv('TOKEN2.csv').set_index(OHCO)
In [266]:
corpus = TOKENS[~TOKENS.pos.str.match('NNPS?')]\
    .groupby(BAG)\
    .term_str.apply(lambda  x:  x.tolist())\
    .reset_index()['term_str'].tolist()
In [267]:
corpus
Out[267]:
[['the'],
 ['late',
  'accounts',
  'from',
  'that',
  'city',
  'seems',
  'to',
  'be',
  'in',
  'a',
  'high',
  'state',
  'of',
  'philosophical',
  'excitement'],
 ['indeed',
  'phenomena',
  'have',
  'there',
  'occurred',
  'of',
  'a',
  'nature',
  'so',
  'completely',
  'unexpected',
  'so',
  'entirely',
  'novel',
  'so',
  'utterly',
  'at',
  'variance',
  'with',
  'preconceived',
  'opinions',
  'as',
  'to',
  'leave',
  'no',
  'doubt',
  'on',
  'my',
  'mind',
  'that',
  'long',
  'ere',
  'this',
  'all',
  'is',
  'in',
  'an',
  'uproar',
  'all',
  'physics',
  'in',
  'a',
  'ferment',
  'all',
  'reason',
  'and',
  'astronomy',
  'together',
  'by',
  'the',
  'ears'],
 ['it',
  'appears',
  'that',
  'on',
  'the',
  'day',
  'of',
  'i',
  'am',
  'not',
  'positive',
  'about',
  'the',
  'date',
  'a',
  'vast',
  'crowd',
  'of',
  'people',
  'for',
  'purposes',
  'not',
  'specifically',
  'mentioned',
  'were',
  'assembled',
  'in',
  'the',
  'great',
  'square',
  'of',
  'the',
  'in',
  'the',
  'wellconditioned',
  'city',
  'of'],
 ['the',
  'day',
  'was',
  'warm',
  'unusually',
  'so',
  'for',
  'the',
  'season',
  'there',
  'was',
  'hardly',
  'a',
  'breath',
  'of',
  'air',
  'stirring',
  'and',
  'the',
  'multitude',
  'were',
  'in',
  'no',
  'bad',
  'humor',
  'at',
  'being',
  'now',
  'and',
  'then',
  'besprinkled',
  'with',
  'friendly',
  'showers',
  'of',
  'momentary',
  'duration',
  'that',
  'fell',
  'from',
  'large',
  'white',
  'masses',
  'of',
  'cloud',
  'which',
  'chequered',
  'in',
  'a',
  'fitful',
  'manner',
  'the',
  'blue',
  'vault',
  'of',
  'the',
  'firmament'],
 ['nevertheless',
  'about',
  'noon',
  'a',
  'slight',
  'but',
  'remarkable',
  'agitation',
  'became',
  'apparent',
  'in',
  'the',
  'assembly',
  'the',
  'clattering',
  'of',
  'ten',
  'thousand',
  'tongues',
  'succeeded',
  'and',
  'in',
  'an',
  'instant',
  'afterward',
  'ten',
  'thousand',
  'faces',
  'were',
  'upturned',
  'toward',
  'the',
  'heavens',
  'ten',
  'thousand',
  'pipes',
  'descended',
  'simultaneously',
  'from',
  'the',
  'corners',
  'of',
  'ten',
  'thousand',
  'mouths',
  'and',
  'a',
  'shout',
  'which',
  'could',
  'be',
  'compared',
  'to',
  'nothing',
  'but',
  'the',
  'roaring',
  'of',
  'resounded',
  'long',
  'loudly',
  'and',
  'furiously',
  'through',
  'all',
  'the',
  'environs',
  'of'],
 ['the',
  'origin',
  'of',
  'this',
  'hubbub',
  'soon',
  'became',
  'sufficiently',
  'evident'],
 ['from',
  'behind',
  'the',
  'huge',
  'bulk',
  'of',
  'one',
  'of',
  'those',
  'sharplydefined',
  'masses',
  'of',
  'cloud',
  'already',
  'mentioned',
  'was',
  'seen',
  'slowly',
  'to',
  'emerge',
  'into',
  'an',
  'open',
  'area',
  'of',
  'blue',
  'space',
  'a',
  'queer',
  'heterogeneous',
  'but',
  'apparently',
  'solid',
  'substance',
  'so',
  'oddly',
  'shaped',
  'so',
  'whimsically',
  'put',
  'together',
  'as',
  'not',
  'to',
  'be',
  'in',
  'any',
  'manner',
  'comprehended',
  'and',
  'never',
  'to',
  'be',
  'sufficiently',
  'admired',
  'by',
  'the',
  'host',
  'of',
  'sturdy',
  'burghers',
  'who',
  'stood',
  'openmouthed',
  'below'],
 ['what', 'could', 'it', 'be'],
 ['in',
  'the',
  'name',
  'of',
  'all',
  'the',
  'vrows',
  'and',
  'devils',
  'in',
  'what',
  'could',
  'it',
  'possibly',
  'portend'],
 ['no',
  'one',
  'knew',
  'no',
  'one',
  'could',
  'imagine',
  'no',
  'one',
  'not',
  'even',
  'the',
  'burgomaster',
  'had',
  'the',
  'slightest',
  'clew',
  'by',
  'which',
  'to',
  'unravel',
  'the',
  'mystery',
  'so',
  'as',
  'nothing',
  'more',
  'reasonable',
  'could',
  'be',
  'done',
  'every',
  'one',
  'to',
  'a',
  'man',
  'replaced',
  'his',
  'pipe',
  'carefully',
  'in',
  'the',
  'corner',
  'of',
  'his',
  'mouth',
  'and',
  'cocking',
  'up',
  'his',
  'right',
  'eye',
  'towards',
  'the',
  'phenomenon',
  'puffed',
  'paused',
  'waddled',
  'about',
  'and',
  'grunted',
  'significantly',
  'then',
  'waddled',
  'back',
  'grunted',
  'paused',
  'and',
  'finally',
  'puffed',
  'again'],
 ['in',
  'the',
  'meantime',
  'however',
  'lower',
  'and',
  'still',
  'lower',
  'toward',
  'the',
  'goodly',
  'city',
  'came',
  'the',
  'object',
  'of',
  'so',
  'much',
  'curiosity',
  'and',
  'the',
  'cause',
  'of',
  'so',
  'much',
  'smoke'],
 ['in',
  'a',
  'very',
  'few',
  'minutes',
  'it',
  'arrived',
  'near',
  'enough',
  'to',
  'be',
  'accurately',
  'discerned'],
 ['it', 'appeared', 'to', 'be', 'yes'],
 ['it',
  'was',
  'undoubtedly',
  'a',
  'species',
  'of',
  'balloon',
  'but',
  'surely',
  'no',
  'such',
  'balloon',
  'had',
  'ever',
  'been',
  'seen',
  'in',
  'before'],
 ['for',
  'who',
  'let',
  'me',
  'ask',
  'ever',
  'heard',
  'of',
  'a',
  'balloon',
  'manufactured',
  'entirely',
  'of',
  'dirty',
  'newspapers'],
 ['no',
  'man',
  'in',
  'certainly',
  'yet',
  'here',
  'under',
  'the',
  'very',
  'noses',
  'of',
  'the',
  'people',
  'or',
  'rather',
  'at',
  'some',
  'distance',
  'above',
  'their',
  'noses',
  'was',
  'the',
  'identical',
  'thing',
  'in',
  'question',
  'and',
  'composed',
  'i',
  'have',
  'it',
  'on',
  'the',
  'best',
  'authority',
  'of',
  'the',
  'precise',
  'material',
  'which',
  'no',
  'one',
  'had',
  'ever',
  'before',
  'known',
  'to',
  'be',
  'used',
  'for',
  'a',
  'similar',
  'purpose'],
 ['it',
  'was',
  'an',
  'egregious',
  'insult',
  'to',
  'the',
  'good',
  'sense',
  'of',
  'the',
  'burghers',
  'of'],
 ['as',
  'to',
  'the',
  'shape',
  'of',
  'the',
  'phenomenon',
  'it',
  'was',
  'even',
  'still',
  'more',
  'reprehensible'],
 ['being',
  'little',
  'or',
  'nothing',
  'better',
  'than',
  'a',
  'huge',
  'foolscap',
  'turned',
  'upside',
  'down'],
 ['and',
  'this',
  'similitude',
  'was',
  'regarded',
  'as',
  'by',
  'no',
  'means',
  'lessened',
  'when',
  'upon',
  'nearer',
  'inspection',
  'there',
  'was',
  'perceived',
  'a',
  'large',
  'tassel',
  'depending',
  'from',
  'its',
  'apex',
  'and',
  'around',
  'the',
  'upper',
  'rim',
  'or',
  'base',
  'of',
  'the',
  'cone',
  'a',
  'circle',
  'of',
  'little',
  'instruments',
  'resembling',
  'sheepbells',
  'which',
  'kept',
  'up',
  'a',
  'continual',
  'tinkling',
  'to',
  'the',
  'tune',
  'of'],
 ['but', 'still', 'worse'],
 ['suspended',
  'by',
  'blue',
  'ribbons',
  'to',
  'the',
  'end',
  'of',
  'this',
  'fantastic',
  'machine',
  'there',
  'hung',
  'by',
  'way',
  'of',
  'car',
  'an',
  'enormous',
  'drab',
  'beaver',
  'hat',
  'with',
  'a',
  'brim',
  'superlatively',
  'broad',
  'and',
  'a',
  'hemispherical',
  'crown',
  'with',
  'a',
  'black',
  'band',
  'and',
  'a',
  'silver',
  'buckle'],
 ['it',
  'is',
  'however',
  'somewhat',
  'remarkable',
  'that',
  'many',
  'citizens',
  'of',
  'swore',
  'to',
  'having',
  'seen',
  'the',
  'same',
  'hat',
  'repeatedly',
  'before',
  'and',
  'indeed',
  'the',
  'whole',
  'assembly',
  'seemed',
  'to',
  'regard',
  'it',
  'with',
  'eyes',
  'of',
  'familiarity',
  'while',
  'the',
  'vrow',
  'upon',
  'sight',
  'of',
  'it',
  'uttered',
  'an',
  'exclamation',
  'of',
  'joyful',
  'surprise',
  'and',
  'declared',
  'it',
  'to',
  'be',
  'the',
  'identical',
  'hat',
  'of',
  'her',
  'good',
  'man',
  'himself'],
 ['now',
  'this',
  'was',
  'a',
  'circumstance',
  'the',
  'more',
  'to',
  'be',
  'observed',
  'as',
  'with',
  'three',
  'companions',
  'had',
  'actually',
  'disappeared',
  'from',
  'about',
  'five',
  'years',
  'before',
  'in',
  'a',
  'very',
  'sudden',
  'and',
  'unaccountable',
  'manner',
  'and',
  'up',
  'to',
  'the',
  'date',
  'of',
  'this',
  'narrative',
  'all',
  'attempts',
  'had',
  'failed',
  'of',
  'obtaining',
  'any',
  'intelligence',
  'concerning',
  'them',
  'whatsoever'],
 ['to',
  'be',
  'sure',
  'some',
  'bones',
  'which',
  'were',
  'thought',
  'to',
  'be',
  'human',
  'mixed',
  'up',
  'with',
  'a',
  'quantity',
  'of',
  'oddlooking',
  'rubbish',
  'had',
  'been',
  'lately',
  'discovered',
  'in',
  'a',
  'retired',
  'situation',
  'to',
  'the',
  'east',
  'of',
  'and',
  'some',
  'people',
  'went',
  'so',
  'far',
  'as',
  'to',
  'imagine',
  'that',
  'in',
  'this',
  'spot',
  'a',
  'foul',
  'murder',
  'had',
  'been',
  'committed',
  'and',
  'that',
  'the',
  'sufferers',
  'were',
  'in',
  'all',
  'probability',
  'and',
  'his',
  'associates'],
 ['but', 'to', 'return'],
 ['the',
  'balloon',
  'for',
  'such',
  'no',
  'doubt',
  'it',
  'was',
  'had',
  'now',
  'descended',
  'to',
  'within',
  'a',
  'hundred',
  'feet',
  'of',
  'the',
  'earth',
  'allowing',
  'the',
  'crowd',
  'below',
  'a',
  'sufficiently',
  'distinct',
  'view',
  'of',
  'the',
  'person',
  'of',
  'its',
  'occupant'],
 ['this', 'was', 'in', 'truth', 'a', 'very', 'droll', 'little', 'somebody'],
 ['he',
  'could',
  'not',
  'have',
  'been',
  'more',
  'than',
  'two',
  'feet',
  'in',
  'height',
  'but',
  'this',
  'altitude',
  'little',
  'as',
  'it',
  'was',
  'would',
  'have',
  'been',
  'sufficient',
  'to',
  'destroy',
  'his',
  'equilibrium',
  'and',
  'tilt',
  'him',
  'over',
  'the',
  'edge',
  'of',
  'his',
  'tiny',
  'car',
  'but',
  'for',
  'the',
  'intervention',
  'of',
  'a',
  'circular',
  'rim',
  'reaching',
  'as',
  'high',
  'as',
  'the',
  'breast',
  'and',
  'rigged',
  'on',
  'to',
  'the',
  'cords',
  'of',
  'the',
  'balloon'],
 ['the',
  'body',
  'of',
  'the',
  'little',
  'man',
  'was',
  'more',
  'than',
  'proportionately',
  'broad',
  'giving',
  'to',
  'his',
  'entire',
  'figure',
  'a',
  'rotundity',
  'highly',
  'absurd'],
 ['his',
  'feet',
  'of',
  'course',
  'could',
  'not',
  'be',
  'seen',
  'at',
  'all',
  'although',
  'a',
  'horny',
  'substance',
  'of',
  'suspicious',
  'nature',
  'was',
  'occasionally',
  'protruded',
  'through',
  'a',
  'rent',
  'in',
  'the',
  'bottom',
  'of',
  'the',
  'car',
  'or',
  'to',
  'speak',
  'more',
  'properly',
  'in',
  'the',
  'top',
  'of',
  'the',
  'hat'],
 ['his', 'hands', 'were', 'enormously', 'large'],
 ['his',
  'hair',
  'was',
  'extremely',
  'gray',
  'and',
  'collected',
  'in',
  'a',
  'cue',
  'behind'],
 ['his',
  'nose',
  'was',
  'prodigiously',
  'long',
  'crooked',
  'and',
  'inflammatory',
  'his',
  'eyes',
  'full',
  'brilliant',
  'and',
  'acute',
  'his',
  'chin',
  'and',
  'cheeks',
  'although',
  'wrinkled',
  'with',
  'age',
  'were',
  'broad',
  'puffy',
  'and',
  'double',
  'but',
  'of',
  'ears',
  'of',
  'any',
  'kind',
  'or',
  'character',
  'there',
  'was',
  'not',
  'a',
  'semblance',
  'to',
  'be',
  'discovered',
  'upon',
  'any',
  'portion',
  'of',
  'his',
  'head'],
 ['this',
  'odd',
  'little',
  'gentleman',
  'was',
  'dressed',
  'in',
  'a',
  'loose',
  'surtout',
  'of',
  'skyblue',
  'satin',
  'with',
  'tight',
  'breeches',
  'to',
  'match',
  'fastened',
  'with',
  'silver',
  'buckles',
  'at',
  'the',
  'knees'],
 ['his',
  'vest',
  'was',
  'of',
  'some',
  'bright',
  'yellow',
  'material',
  'a',
  'white',
  'taffety',
  'cap',
  'was',
  'set',
  'jauntily',
  'on',
  'one',
  'side',
  'of',
  'his',
  'head',
  'and',
  'to',
  'complete',
  'his',
  'equipment',
  'a',
  'bloodred',
  'silk',
  'handkerchief',
  'enveloped',
  'his',
  'throat',
  'and',
  'fell',
  'down',
  'in',
  'a',
  'dainty',
  'manner',
  'upon',
  'his',
  'bosom',
  'in',
  'a',
  'fantastic',
  'bowknot',
  'of',
  'supereminent',
  'dimensions'],
 ['having',
  'descended',
  'as',
  'i',
  'said',
  'before',
  'to',
  'about',
  'one',
  'hundred',
  'feet',
  'from',
  'the',
  'surface',
  'of',
  'the',
  'earth',
  'the',
  'little',
  'old',
  'gentleman',
  'was',
  'suddenly',
  'seized',
  'with',
  'a',
  'fit',
  'of',
  'trepidation',
  'and',
  'appeared',
  'disinclined',
  'to',
  'make',
  'any',
  'nearer',
  'approach',
  'to',
  'terra',
  'firma'],
 ['throwing',
  'out',
  'therefore',
  'a',
  'quantity',
  'of',
  'sand',
  'from',
  'a',
  'canvas',
  'bag',
  'which',
  'he',
  'lifted',
  'with',
  'great',
  'difficulty',
  'he',
  'became',
  'stationary',
  'in',
  'an',
  'instant'],
 ['he',
  'then',
  'proceeded',
  'in',
  'a',
  'hurried',
  'and',
  'agitated',
  'manner',
  'to',
  'extract',
  'from',
  'a',
  'sidepocket',
  'in',
  'his',
  'surtout',
  'a',
  'large',
  'morocco',
  'pocketbook'],
 ['this',
  'he',
  'poised',
  'suspiciously',
  'in',
  'his',
  'hand',
  'then',
  'eyed',
  'it',
  'with',
  'an',
  'air',
  'of',
  'extreme',
  'surprise',
  'and',
  'was',
  'evidently',
  'astonished',
  'at',
  'its',
  'weight'],
 ['he',
  'at',
  'length',
  'opened',
  'it',
  'and',
  'drawing',
  'there',
  'from',
  'a',
  'huge',
  'letter',
  'sealed',
  'with',
  'red',
  'sealingwax',
  'and',
  'tied',
  'carefully',
  'with',
  'red',
  'tape',
  'let',
  'it',
  'fall',
  'precisely',
  'at',
  'the',
  'feet',
  'of',
  'the',
  'burgomaster'],
 ['his', 'excellency', 'stooped', 'to', 'take', 'it', 'up'],
 ['but',
  'the',
  'aeronaut',
  'still',
  'greatly',
  'discomposed',
  'and',
  'having',
  'apparently',
  'no',
  'farther',
  'business',
  'to',
  'detain',
  'him',
  'in',
  'began',
  'at',
  'this',
  'moment',
  'to',
  'make',
  'busy',
  'preparations',
  'for',
  'departure',
  'and',
  'it',
  'being',
  'necessary',
  'to',
  'discharge',
  'a',
  'portion',
  'of',
  'ballast',
  'to',
  'enable',
  'him',
  'to',
  'reascend',
  'the',
  'half',
  'dozen',
  'bags',
  'which',
  'he',
  'threw',
  'out',
  'one',
  'after',
  'another',
  'without',
  'taking',
  'the',
  'trouble',
  'to',
  'empty',
  'their',
  'contents',
  'tumbled',
  'every',
  'one',
  'of',
  'them',
  'most',
  'unfortunately',
  'upon',
  'the',
  'back',
  'of',
  'the',
  'burgomaster',
  'and',
  'rolled',
  'him',
  'over',
  'and',
  'over',
  'no',
  'less',
  'than',
  'oneandtwenty',
  'times',
  'in',
  'the',
  'face',
  'of',
  'every',
  'man',
  'in'],
 ['it',
  'is',
  'not',
  'to',
  'be',
  'supposed',
  'however',
  'that',
  'the',
  'great',
  'suffered',
  'this',
  'impertinence',
  'on',
  'the',
  'part',
  'of',
  'the',
  'little',
  'old',
  'man',
  'to',
  'pass',
  'off',
  'with',
  'impunity'],
 ['it',
  'is',
  'said',
  'on',
  'the',
  'contrary',
  'that',
  'during',
  'each',
  'and',
  'every',
  'one',
  'of',
  'his',
  'oneand',
  'twenty',
  'circumvolutions',
  'he',
  'emitted',
  'no',
  'less',
  'than',
  'oneandtwenty',
  'distinct',
  'and',
  'furious',
  'whiffs',
  'from',
  'his',
  'pipe',
  'to',
  'which',
  'he',
  'held',
  'fast',
  'the',
  'whole',
  'time',
  'with',
  'all',
  'his',
  'might',
  'and',
  'to',
  'which',
  'he',
  'intends',
  'holding',
  'fast',
  'until',
  'the',
  'day',
  'of',
  'his',
  'death'],
 ['in',
  'the',
  'meantime',
  'the',
  'balloon',
  'arose',
  'like',
  'a',
  'lark',
  'and',
  'soaring',
  'far',
  'away',
  'above',
  'the',
  'city',
  'at',
  'length',
  'drifted',
  'quietly',
  'behind',
  'a',
  'cloud',
  'similar',
  'to',
  'that',
  'from',
  'which',
  'it',
  'had',
  'so',
  'oddly',
  'emerged',
  'and',
  'was',
  'thus',
  'lost',
  'forever',
  'to',
  'the',
  'wondering',
  'eyes',
  'of',
  'the',
  'good',
  'citizens',
  'of'],
 ['all',
  'attention',
  'was',
  'now',
  'directed',
  'to',
  'the',
  'letter',
  'the',
  'descent',
  'of',
  'which',
  'and',
  'the',
  'consequences',
  'attending',
  'thereupon',
  'had',
  'proved',
  'so',
  'fatally',
  'subversive',
  'of',
  'both',
  'person',
  'and',
  'personal',
  'dignity',
  'to',
  'his',
  'the',
  'illustrious'],
 ['that',
  'functionary',
  'however',
  'had',
  'not',
  'failed',
  'during',
  'his',
  'circumgyratory',
  'movements',
  'to',
  'bestow',
  'a',
  'thought',
  'upon',
  'the',
  'important',
  'subject',
  'of',
  'securing',
  'the',
  'packet',
  'in',
  'question',
  'which',
  'was',
  'seen',
  'upon',
  'inspection',
  'to',
  'have',
  'fallen',
  'into',
  'the',
  'most',
  'proper',
  'hands',
  'being',
  'actually',
  'addressed',
  'to',
  'himself',
  'and',
  'in',
  'their',
  'official',
  'capacities',
  'of',
  'and',
  'of',
  'the',
  'of'],
 ['it',
  'was',
  'accordingly',
  'opened',
  'by',
  'those',
  'dignitaries',
  'upon',
  'the',
  'spot',
  'and',
  'found',
  'to',
  'contain',
  'the',
  'following',
  'extraordinary',
  'and',
  'indeed',
  'very',
  'serious',
  'communications'],
 ['to',
  'their',
  'excellencies',
  'and',
  'and',
  'of',
  'the',
  'of',
  'in',
  'the',
  'city',
  'of'],
 ['your',
  'excellencies',
  'may',
  'perhaps',
  'be',
  'able',
  'to',
  'remember',
  'an',
  'humble',
  'artizan',
  'by',
  'name',
  'and',
  'by',
  'occupation',
  'a',
  'mender',
  'of',
  'bellows',
  'who',
  'with',
  'three',
  'others',
  'disappeared',
  'from',
  'about',
  'five',
  'years',
  'ago',
  'in',
  'a',
  'manner',
  'which',
  'must',
  'have',
  'been',
  'considered',
  'by',
  'all',
  'parties',
  'at',
  'once',
  'sudden',
  'and',
  'extremely',
  'unaccountable'],
 ['if',
  'however',
  'it',
  'so',
  'please',
  'your',
  'excellencies',
  'i',
  'the',
  'writer',
  'of',
  'this',
  'communication',
  'am',
  'the',
  'identical',
  'himself'],
 ['it',
  'is',
  'well',
  'known',
  'to',
  'most',
  'of',
  'my',
  'fellow',
  'citizens',
  'that',
  'for',
  'the',
  'period',
  'of',
  'forty',
  'years',
  'i',
  'continued',
  'to',
  'occupy',
  'the',
  'little',
  'square',
  'brick',
  'building',
  'at',
  'the',
  'head',
  'of',
  'the',
  'alley',
  'called',
  'in',
  'which',
  'i',
  'resided',
  'at',
  'the',
  'time',
  'of',
  'my',
  'disappearance'],
 ['my',
  'ancestors',
  'have',
  'also',
  'resided',
  'therein',
  'time',
  'out',
  'of',
  'mind',
  'they',
  'as',
  'well',
  'as',
  'myself',
  'steadily',
  'following',
  'the',
  'respectable',
  'and',
  'indeed',
  'lucrative',
  'profession',
  'of',
  'mending',
  'of',
  'bellows'],
 ['for',
  'to',
  'speak',
  'the',
  'truth',
  'until',
  'of',
  'late',
  'years',
  'that',
  'the',
  'heads',
  'of',
  'all',
  'the',
  'people',
  'have',
  'been',
  'set',
  'agog',
  'with',
  'politics',
  'no',
  'better',
  'business',
  'than',
  'my',
  'own',
  'could',
  'an',
  'honest',
  'citizen',
  'of',
  'either',
  'desire',
  'or',
  'deserve'],
 ['credit',
  'was',
  'good',
  'employment',
  'was',
  'never',
  'wanting',
  'and',
  'on',
  'all',
  'hands',
  'there',
  'was',
  'no',
  'lack',
  'of',
  'either',
  'money',
  'or',
  'goodwill'],
 ['but',
  'as',
  'i',
  'was',
  'saying',
  'we',
  'soon',
  'began',
  'to',
  'feel',
  'the',
  'effects',
  'of',
  'liberty',
  'and',
  'long',
  'speeches',
  'and',
  'radicalism',
  'and',
  'all',
  'that',
  'sort',
  'of',
  'thing'],
 ['people',
  'who',
  'were',
  'formerly',
  'the',
  'very',
  'best',
  'customers',
  'in',
  'the',
  'world',
  'had',
  'now',
  'not',
  'a',
  'moment',
  'of',
  'time',
  'to',
  'think',
  'of',
  'us',
  'at',
  'all'],
 ['they',
  'had',
  'so',
  'they',
  'said',
  'as',
  'much',
  'as',
  'they',
  'could',
  'do',
  'to',
  'read',
  'about',
  'the',
  'revolutions',
  'and',
  'keep',
  'up',
  'with',
  'the',
  'march',
  'of',
  'intellect',
  'and',
  'the',
  'spirit',
  'of',
  'the',
  'age'],
 ['if',
  'a',
  'fire',
  'wanted',
  'fanning',
  'it',
  'could',
  'readily',
  'be',
  'fanned',
  'with',
  'a',
  'newspaper',
  'and',
  'as',
  'the',
  'government',
  'grew',
  'weaker',
  'i',
  'have',
  'no',
  'doubt',
  'that',
  'leather',
  'and',
  'iron',
  'acquired',
  'durability',
  'in',
  'proportion',
  'for',
  'in',
  'a',
  'very',
  'short',
  'time',
  'there',
  'was',
  'not',
  'a',
  'pair',
  'of',
  'bellows',
  'in',
  'all',
  'that',
  'ever',
  'stood',
  'in',
  'need',
  'of',
  'a',
  'stitch',
  'or',
  'required',
  'the',
  'assistance',
  'of',
  'a',
  'hammer'],
 ['this', 'was', 'a', 'state', 'of', 'things', 'not', 'to', 'be', 'endured'],
 ['i',
  'soon',
  'grew',
  'as',
  'poor',
  'as',
  'a',
  'rat',
  'and',
  'having',
  'a',
  'wife',
  'and',
  'children',
  'to',
  'provide',
  'for',
  'my',
  'burdens',
  'at',
  'length',
  'became',
  'intolerable',
  'and',
  'i',
  'spent',
  'hour',
  'after',
  'hour',
  'in',
  'reflecting',
  'upon',
  'the',
  'most',
  'convenient',
  'method',
  'of',
  'putting',
  'an',
  'end',
  'to',
  'my',
  'life'],
 ['in',
  'the',
  'meantime',
  'left',
  'me',
  'little',
  'leisure',
  'for',
  'contemplation'],
 ['my',
  'house',
  'was',
  'literally',
  'besieged',
  'from',
  'morning',
  'till',
  'night',
  'so',
  'that',
  'i',
  'began',
  'to',
  'rave',
  'and',
  'foam',
  'and',
  'fret',
  'like',
  'a',
  'caged',
  'tiger',
  'against',
  'the',
  'bars',
  'of',
  'his',
  'enclosure'],
 ['there',
  'were',
  'three',
  'fellows',
  'in',
  'particular',
  'who',
  'worried',
  'me',
  'beyond',
  'endurance',
  'keeping',
  'watch',
  'continually',
  'about',
  'my',
  'door',
  'and',
  'threatening',
  'me',
  'with',
  'the',
  'law'],
 ['upon',
  'these',
  'three',
  'i',
  'internally',
  'vowed',
  'the',
  'bitterest',
  'revenge',
  'if',
  'ever',
  'i',
  'should',
  'be',
  'so',
  'happy',
  'as',
  'to',
  'get',
  'them',
  'within',
  'my',
  'clutches',
  'and',
  'i',
  'believe',
  'nothing',
  'in',
  'the',
  'world',
  'but',
  'the',
  'pleasure',
  'of',
  'this',
  'anticipation',
  'prevented',
  'me',
  'from',
  'putting',
  'my',
  'plan',
  'of',
  'suicide',
  'into',
  'immediate',
  'execution',
  'by',
  'blowing',
  'my',
  'brains',
  'out',
  'with',
  'a',
  'blunderbuss'],
 ['i',
  'thought',
  'it',
  'best',
  'however',
  'to',
  'dissemble',
  'my',
  'wrath',
  'and',
  'to',
  'treat',
  'them',
  'with',
  'promises',
  'and',
  'fair',
  'words',
  'until',
  'by',
  'some',
  'good',
  'turn',
  'of',
  'fate',
  'an',
  'opportunity',
  'of',
  'vengeance',
  'should',
  'be',
  'afforded',
  'me'],
 ['one',
  'day',
  'having',
  'given',
  'my',
  'creditors',
  'the',
  'slip',
  'and',
  'feeling',
  'more',
  'than',
  'usually',
  'dejected',
  'i',
  'continued',
  'for',
  'a',
  'long',
  'time',
  'to',
  'wander',
  'about',
  'the',
  'most',
  'obscure',
  'streets',
  'without',
  'object',
  'whatever',
  'until',
  'at',
  'length',
  'i',
  'chanced',
  'to',
  'stumble',
  'against',
  'the',
  'corner',
  'of',
  'a',
  'bookseller',
  's',
  'stall'],
 ['seeing',
  'a',
  'chair',
  'close',
  'at',
  'hand',
  'for',
  'the',
  'use',
  'of',
  'customers',
  'i',
  'threw',
  'myself',
  'doggedly',
  'into',
  'it',
  'and',
  'hardly',
  'knowing',
  'why',
  'opened',
  'the',
  'pages',
  'of',
  'the',
  'first',
  'volume',
  'which',
  'came',
  'within',
  'my',
  'reach'],
 ['it',
  'proved',
  'to',
  'be',
  'a',
  'small',
  'pamphlet',
  'treatise',
  'on',
  'written',
  'either',
  'by',
  'of',
  'or',
  'by',
  'a',
  'of',
  'somewhat',
  'similar',
  'name'],
 ['i',
  'had',
  'some',
  'little',
  'tincture',
  'of',
  'information',
  'on',
  'matters',
  'of',
  'this',
  'nature',
  'and',
  'soon',
  'became',
  'more',
  'and',
  'more',
  'absorbed',
  'in',
  'the',
  'contents',
  'of',
  'the',
  'book',
  'reading',
  'it',
  'actually',
  'through',
  'twice',
  'before',
  'i',
  'awoke',
  'to',
  'a',
  'recollection',
  'of',
  'what',
  'was',
  'passing',
  'around',
  'me'],
 ['by',
  'this',
  'time',
  'it',
  'began',
  'to',
  'grow',
  'dark',
  'and',
  'i',
  'directed',
  'my',
  'steps',
  'toward',
  'home'],
 ['but',
  'the',
  'treatise',
  'had',
  'made',
  'an',
  'indelible',
  'impression',
  'on',
  'my',
  'mind',
  'and',
  'as',
  'i',
  'sauntered',
  'along',
  'the',
  'dusky',
  'streets',
  'i',
  'revolved',
  'carefully',
  'over',
  'in',
  'my',
  'memory',
  'the',
  'wild',
  'and',
  'sometimes',
  'unintelligible',
  'reasonings',
  'of',
  'the',
  'writer'],
 ['there',
  'are',
  'some',
  'particular',
  'passages',
  'which',
  'affected',
  'my',
  'imagination',
  'in',
  'a',
  'powerful',
  'and',
  'extraordinary',
  'manner'],
 ['the',
  'longer',
  'i',
  'meditated',
  'upon',
  'these',
  'the',
  'more',
  'intense',
  'grew',
  'the',
  'interest',
  'which',
  'had',
  'been',
  'excited',
  'within',
  'me'],
 ['the',
  'limited',
  'nature',
  'of',
  'my',
  'education',
  'in',
  'general',
  'and',
  'more',
  'especially',
  'my',
  'ignorance',
  'on',
  'subjects',
  'connected',
  'with',
  'natural',
  'philosophy',
  'so',
  'far',
  'from',
  'rendering',
  'me',
  'diffident',
  'of',
  'my',
  'own',
  'ability',
  'to',
  'comprehend',
  'what',
  'i',
  'had',
  'read',
  'or',
  'inducing',
  'me',
  'to',
  'mistrust',
  'the',
  'many',
  'vague',
  'notions',
  'which',
  'had',
  'arisen',
  'in',
  'consequence',
  'merely',
  'served',
  'as',
  'a',
  'farther',
  'stimulus',
  'to',
  'imagination',
  'and',
  'i',
  'was',
  'vain',
  'enough',
  'or',
  'perhaps',
  'reasonable',
  'enough',
  'to',
  'doubt',
  'whether',
  'those',
  'crude',
  'ideas',
  'which',
  'arising',
  'in',
  'illregulated',
  'minds',
  'have',
  'all',
  'the',
  'appearance',
  'may',
  'not',
  'often',
  'in',
  'effect',
  'possess',
  'all',
  'the',
  'force',
  'the',
  'reality',
  'and',
  'other',
  'inherent',
  'properties',
  'of',
  'instinct',
  'or',
  'intuition',
  'whether',
  'to',
  'proceed',
  'a',
  'step',
  'farther',
  'profundity',
  'itself',
  'might',
  'not',
  'in',
  'matters',
  'of',
  'a',
  'purely',
  'speculative',
  'nature',
  'be',
  'detected',
  'as',
  'a',
  'legitimate',
  'source',
  'of',
  'falsity',
  'and',
  'error'],
 ['in',
  'other',
  'words',
  'i',
  'believed',
  'and',
  'still',
  'do',
  'believe',
  'that',
  'truth',
  'is',
  'frequently',
  'of',
  'its',
  'own',
  'essence',
  'superficial',
  'and',
  'that',
  'in',
  'many',
  'cases',
  'the',
  'depth',
  'lies',
  'more',
  'in',
  'the',
  'abysses',
  'where',
  'we',
  'seek',
  'her',
  'than',
  'in',
  'the',
  'actual',
  'situations',
  'wherein',
  'she',
  'may',
  'be',
  'found'],
 ['nature',
  'herself',
  'seemed',
  'to',
  'afford',
  'me',
  'corroboration',
  'of',
  'these',
  'ideas'],
 ['in',
  'the',
  'contemplation',
  'of',
  'the',
  'heavenly',
  'bodies',
  'it',
  'struck',
  'me',
  'forcibly',
  'that',
  'i',
  'could',
  'not',
  'distinguish',
  'a',
  'star',
  'with',
  'nearly',
  'as',
  'much',
  'precision',
  'when',
  'i',
  'gazed',
  'on',
  'it',
  'with',
  'earnest',
  'direct',
  'and',
  'undeviating',
  'attention',
  'as',
  'when',
  'i',
  'suffered',
  'my',
  'eye',
  'only',
  'to',
  'glance',
  'in',
  'its',
  'vicinity',
  'alone'],
 ['i',
  'was',
  'not',
  'of',
  'course',
  'at',
  'that',
  'time',
  'aware',
  'that',
  'this',
  'apparent',
  'paradox',
  'was',
  'occasioned',
  'by',
  'the',
  'center',
  'of',
  'the',
  'visual',
  'area',
  'being',
  'less',
  'susceptible',
  'of',
  'feeble',
  'impressions',
  'of',
  'light',
  'than',
  'the',
  'exterior',
  'portions',
  'of',
  'the',
  'retina'],
 ['this',
  'knowledge',
  'and',
  'some',
  'of',
  'another',
  'kind',
  'came',
  'afterwards',
  'in',
  'the',
  'course',
  'of',
  'an',
  'eventful',
  'five',
  'years',
  'during',
  'which',
  'i',
  'have',
  'dropped',
  'the',
  'prejudices',
  'of',
  'my',
  'former',
  'humble',
  'situation',
  'in',
  'life',
  'and',
  'forgotten',
  'the',
  'bellowsmender',
  'in',
  'far',
  'different',
  'occupations'],
 ['but',
  'at',
  'the',
  'epoch',
  'of',
  'which',
  'i',
  'speak',
  'the',
  'analogy',
  'which',
  'a',
  'casual',
  'observation',
  'of',
  'a',
  'star',
  'offered',
  'to',
  'the',
  'conclusions',
  'i',
  'had',
  'already',
  'drawn',
  'struck',
  'me',
  'with',
  'the',
  'force',
  'of',
  'positive',
  'conformation',
  'and',
  'i',
  'then',
  'finally',
  'made',
  'up',
  'my',
  'mind',
  'to',
  'the',
  'course',
  'which',
  'i',
  'afterwards',
  'pursued'],
 ['it',
  'was',
  'late',
  'when',
  'i',
  'reached',
  'home',
  'and',
  'i',
  'went',
  'immediately',
  'to',
  'bed'],
 ['my',
  'mind',
  'however',
  'was',
  'too',
  'much',
  'occupied',
  'to',
  'sleep',
  'and',
  'i',
  'lay',
  'the',
  'whole',
  'night',
  'buried',
  'in',
  'meditation'],
 ['arising',
  'early',
  'in',
  'the',
  'morning',
  'and',
  'contriving',
  'again',
  'to',
  'escape',
  'the',
  'vigilance',
  'of',
  'my',
  'creditors',
  'i',
  'repaired',
  'eagerly',
  'to',
  'the',
  'bookseller',
  's',
  'stall',
  'and',
  'laid',
  'out',
  'what',
  'little',
  'ready',
  'money',
  'i',
  'possessed',
  'in',
  'the',
  'purchase',
  'of',
  'some',
  'volumes',
  'of',
  'mechanics',
  'and'],
 ['having',
  'arrived',
  'at',
  'home',
  'safely',
  'with',
  'these',
  'i',
  'devoted',
  'every',
  'spare',
  'moment',
  'to',
  'their',
  'perusal',
  'and',
  'soon',
  'made',
  'such',
  'proficiency',
  'in',
  'studies',
  'of',
  'this',
  'nature',
  'as',
  'i',
  'thought',
  'sufficient',
  'for',
  'the',
  'execution',
  'of',
  'my',
  'plan'],
 ['in',
  'the',
  'intervals',
  'of',
  'this',
  'period',
  'i',
  'made',
  'every',
  'endeavor',
  'to',
  'conciliate',
  'the',
  'three',
  'creditors',
  'who',
  'had',
  'given',
  'me',
  'so',
  'much',
  'annoyance'],
 ['in',
  'this',
  'i',
  'finally',
  'succeeded',
  'partly',
  'by',
  'selling',
  'enough',
  'of',
  'my',
  'household',
  'furniture',
  'to',
  'satisfy',
  'a',
  'moiety',
  'of',
  'their',
  'claim',
  'and',
  'partly',
  'by',
  'a',
  'promise',
  'of',
  'paying',
  'the',
  'balance',
  'upon',
  'completion',
  'of',
  'a',
  'little',
  'project',
  'which',
  'i',
  'told',
  'them',
  'i',
  'had',
  'in',
  'view',
  'and',
  'for',
  'assistance',
  'in',
  'which',
  'i',
  'solicited',
  'their',
  'services'],
 ['by',
  'these',
  'means',
  'for',
  'they',
  'were',
  'ignorant',
  'men',
  'i',
  'found',
  'little',
  'difficulty',
  'in',
  'gaining',
  'them',
  'over',
  'to',
  'my',
  'purpose'],
 ['matters',
  'being',
  'thus',
  'arranged',
  'i',
  'contrived',
  'by',
  'the',
  'aid',
  'of',
  'my',
  'wife',
  'and',
  'with',
  'the',
  'greatest',
  'secrecy',
  'and',
  'caution',
  'to',
  'dispose',
  'of',
  'what',
  'property',
  'i',
  'had',
  'remaining',
  'and',
  'to',
  'borrow',
  'in',
  'small',
  'sums',
  'under',
  'various',
  'pretences',
  'and',
  'without',
  'paying',
  'any',
  'attention',
  'to',
  'my',
  'future',
  'means',
  'of',
  'repayment',
  'no',
  'inconsiderable',
  'quantity',
  'of',
  'ready',
  'money'],
 ['with',
  'the',
  'means',
  'thus',
  'accruing',
  'i',
  'proceeded',
  'to',
  'procure',
  'at',
  'intervals',
  'cambric',
  'muslin',
  'very',
  'fine',
  'in',
  'pieces',
  'of',
  'twelve',
  'yards',
  'each',
  'twine',
  'a',
  'lot',
  'of',
  'the',
  'varnish',
  'of',
  'caoutchouc',
  'a',
  'large',
  'and',
  'deep',
  'basket',
  'of',
  'wickerwork',
  'made',
  'to',
  'order',
  'and',
  'several',
  'other',
  'articles',
  'necessary',
  'in',
  'the',
  'construction',
  'and',
  'equipment',
  'of',
  'a',
  'balloon',
  'of',
  'extraordinary',
  'dimensions'],
 ['this',
  'i',
  'directed',
  'my',
  'wife',
  'to',
  'make',
  'up',
  'as',
  'soon',
  'as',
  'possible',
  'and',
  'gave',
  'her',
  'all',
  'requisite',
  'information',
  'as',
  'to',
  'the',
  'particular',
  'method',
  'of',
  'proceeding'],
 ['in',
  'the',
  'meantime',
  'i',
  'worked',
  'up',
  'the',
  'twine',
  'into',
  'a',
  'network',
  'of',
  'sufficient',
  'dimensions',
  'rigged',
  'it',
  'with',
  'a',
  'hoop',
  'and',
  'the',
  'necessary',
  'cords',
  'bought',
  'a',
  'quadrant',
  'a',
  'compass',
  'a',
  'spyglass',
  'a',
  'common',
  'barometer',
  'with',
  'some',
  'important',
  'modifications',
  'and',
  'two',
  'astronomical',
  'instruments',
  'not',
  'so',
  'generally',
  'known'],
 ['i',
  'then',
  'took',
  'opportunities',
  'of',
  'conveying',
  'by',
  'night',
  'to',
  'a',
  'retired',
  'situation',
  'east',
  'of',
  'five',
  'ironbound',
  'casks',
  'to',
  'contain',
  'about',
  'fifty',
  'gallons',
  'each',
  'and',
  'one',
  'of',
  'a',
  'larger',
  'size',
  'six',
  'tinned',
  'ware',
  'tubes',
  'three',
  'inches',
  'in',
  'diameter',
  'properly',
  'shaped',
  'and',
  'ten',
  'feet',
  'in',
  'length',
  'a',
  'quantity',
  'of',
  'a',
  'particular',
  'metallic',
  'substance',
  'or',
  'semimetal',
  'which',
  'i',
  'shall',
  'not',
  'name',
  'and',
  'a',
  'dozen',
  'demijohns',
  'of',
  'a',
  'very',
  'common',
  'acid'],
 ['the',
  'gas',
  'to',
  'be',
  'formed',
  'from',
  'these',
  'latter',
  'materials',
  'is',
  'a',
  'gas',
  'never',
  'yet',
  'generated',
  'by',
  'any',
  'other',
  'person',
  'than',
  'myself',
  'or',
  'at',
  'least',
  'never',
  'applied',
  'to',
  'any',
  'similar',
  'purpose'],
 ['the',
  'secret',
  'i',
  'would',
  'make',
  'no',
  'difficulty',
  'in',
  'disclosing',
  'but',
  'that',
  'it',
  'of',
  'right',
  'belongs',
  'to',
  'a',
  'citizen',
  'of',
  'in',
  'by',
  'whom',
  'it',
  'was',
  'conditionally',
  'communicated',
  'to',
  'myself'],
 ['the',
  'same',
  'individual',
  'submitted',
  'to',
  'me',
  'without',
  'being',
  'at',
  'all',
  'aware',
  'of',
  'my',
  'intentions',
  'a',
  'method',
  'of',
  'constructing',
  'balloons',
  'from',
  'the',
  'membrane',
  'of',
  'a',
  'certain',
  'animal',
  'through',
  'which',
  'substance',
  'any',
  'escape',
  'of',
  'gas',
  'was',
  'nearly',
  'an',
  'impossibility'],
 ['i',
  'found',
  'it',
  'however',
  'altogether',
  'too',
  'expensive',
  'and',
  'was',
  'not',
  'sure',
  'upon',
  'the',
  'whole',
  'whether',
  'cambric',
  'muslin',
  'with',
  'a',
  'coating',
  'of',
  'gum',
  'caoutchouc',
  'was',
  'not',
  'equally',
  'as',
  'good'],
 ['i',
  'mention',
  'this',
  'circumstance',
  'because',
  'i',
  'think',
  'it',
  'probable',
  'that',
  'hereafter',
  'the',
  'individual',
  'in',
  'question',
  'may',
  'attempt',
  'a',
  'balloon',
  'ascension',
  'with',
  'the',
  'novel',
  'gas',
  'and',
  'material',
  'i',
  'have',
  'spoken',
  'of',
  'and',
  'i',
  'do',
  'not',
  'wish',
  'to',
  'deprive',
  'him',
  'of',
  'the',
  'honor',
  'of',
  'a',
  'very',
  'singular',
  'invention'],
 ['on',
  'the',
  'spot',
  'which',
  'i',
  'intended',
  'each',
  'of',
  'the',
  'smaller',
  'casks',
  'to',
  'occupy',
  'respectively',
  'during',
  'the',
  'inflation',
  'of',
  'the',
  'balloon',
  'i',
  'privately',
  'dug',
  'a',
  'hole',
  'two',
  'feet',
  'deep',
  'the',
  'holes',
  'forming',
  'in',
  'this',
  'manner',
  'a',
  'circle',
  'twentyfive',
  'feet',
  'in',
  'diameter'],
 ['in',
  'the',
  'centre',
  'of',
  'this',
  'circle',
  'being',
  'the',
  'station',
  'designed',
  'for',
  'the',
  'large',
  'cask',
  'i',
  'also',
  'dug',
  'a',
  'hole',
  'three',
  'feet',
  'in',
  'depth'],
 ['in',
  'each',
  'of',
  'the',
  'five',
  'smaller',
  'holes',
  'i',
  'deposited',
  'a',
  'canister',
  'containing',
  'fifty',
  'pounds',
  'and',
  'in',
  'the',
  'larger',
  'one',
  'a',
  'keg',
  'holding',
  'one',
  'hundred',
  'and',
  'fifty',
  'pounds',
  'of',
  'cannon',
  'powder'],
 ['these',
  'the',
  'keg',
  'and',
  'canisters',
  'i',
  'connected',
  'in',
  'a',
  'proper',
  'manner',
  'with',
  'covered',
  'trains',
  'and',
  'having',
  'let',
  'into',
  'one',
  'of',
  'the',
  'canisters',
  'the',
  'end',
  'of',
  'about',
  'four',
  'feet',
  'of',
  'slow',
  'match',
  'i',
  'covered',
  'up',
  'the',
  'hole',
  'and',
  'placed',
  'the',
  'cask',
  'over',
  'it',
  'leaving',
  'the',
  'other',
  'end',
  'of',
  'the',
  'match',
  'protruding',
  'about',
  'an',
  'inch',
  'and',
  'barely',
  'visible',
  'beyond',
  'the',
  'cask'],
 ['i',
  'then',
  'filled',
  'up',
  'the',
  'remaining',
  'holes',
  'and',
  'placed',
  'the',
  'barrels',
  'over',
  'them',
  'in',
  'their',
  'destined',
  'situation'],
 ['besides',
  'the',
  'articles',
  'above',
  'enumerated',
  'i',
  'conveyed',
  'to',
  'the',
  'depot',
  'and',
  'there',
  'secreted',
  'one',
  'of',
  's',
  'improvements',
  'upon',
  'the',
  'apparatus',
  'for',
  'condensation',
  'of',
  'the',
  'atmospheric',
  'air'],
 ['i',
  'found',
  'this',
  'machine',
  'however',
  'to',
  'require',
  'considerable',
  'alteration',
  'before',
  'it',
  'could',
  'be',
  'adapted',
  'to',
  'the',
  'purposes',
  'to',
  'which',
  'i',
  'intended',
  'making',
  'it',
  'applicable'],
 ['but',
  'with',
  'severe',
  'labor',
  'and',
  'unremitting',
  'perseverance',
  'i',
  'at',
  'length',
  'met',
  'with',
  'entire',
  'success',
  'in',
  'all',
  'my',
  'preparations'],
 ['my', 'balloon', 'was', 'soon', 'completed'],
 ['it',
  'would',
  'contain',
  'more',
  'than',
  'forty',
  'thousand',
  'cubic',
  'feet',
  'of',
  'gas',
  'would',
  'take',
  'me',
  'up',
  'easily',
  'i',
  'calculated',
  'with',
  'all',
  'my',
  'implements',
  'and',
  'if',
  'i',
  'managed',
  'rightly',
  'with',
  'one',
  'hundred',
  'and',
  'seventyfive',
  'pounds',
  'of',
  'ballast',
  'into',
  'the',
  'bargain'],
 ['it',
  'had',
  'received',
  'three',
  'coats',
  'of',
  'varnish',
  'and',
  'i',
  'found',
  'the',
  'cambric',
  'muslin',
  'to',
  'answer',
  'all',
  'the',
  'purposes',
  'of',
  'silk',
  'itself',
  'quite',
  'as',
  'strong',
  'and',
  'a',
  'good',
  'deal',
  'less',
  'expensive'],
 ['everything',
  'being',
  'now',
  'ready',
  'i',
  'exacted',
  'from',
  'my',
  'wife',
  'an',
  'oath',
  'of',
  'secrecy',
  'in',
  'relation',
  'to',
  'all',
  'my',
  'actions',
  'from',
  'the',
  'day',
  'of',
  'my',
  'first',
  'visit',
  'to',
  'the',
  'bookseller',
  's',
  'stall',
  'and',
  'promising',
  'on',
  'my',
  'part',
  'to',
  'return',
  'as',
  'soon',
  'as',
  'circumstances',
  'would',
  'permit',
  'i',
  'gave',
  'her',
  'what',
  'little',
  'money',
  'i',
  'had',
  'left',
  'and',
  'bade',
  'her',
  'farewell'],
 ['indeed', 'i', 'had', 'no', 'fear', 'on', 'her', 'account'],
 ['she',
  'was',
  'what',
  'people',
  'call',
  'a',
  'notable',
  'woman',
  'and',
  'could',
  'manage',
  'matters',
  'in',
  'the',
  'world',
  'without',
  'my',
  'assistance'],
 ['i',
  'believe',
  'to',
  'tell',
  'the',
  'truth',
  'she',
  'always',
  'looked',
  'upon',
  'me',
  'as',
  'an',
  'idle',
  'boy',
  'a',
  'mere',
  'makeweight',
  'good',
  'for',
  'nothing',
  'but',
  'building',
  'castles',
  'in',
  'the',
  'air',
  'and',
  'was',
  'rather',
  'glad',
  'to',
  'get',
  'rid',
  'of',
  'me'],
 ['it',
  'was',
  'a',
  'dark',
  'night',
  'when',
  'i',
  'bade',
  'her',
  'goodbye',
  'and',
  'taking',
  'with',
  'me',
  'as',
  'aidesdecamp',
  'the',
  'three',
  'creditors',
  'who',
  'had',
  'given',
  'me',
  'so',
  'much',
  'trouble',
  'we',
  'carried',
  'the',
  'balloon',
  'with',
  'the',
  'car',
  'and',
  'accoutrements',
  'by',
  'a',
  'roundabout',
  'way',
  'to',
  'the',
  'station',
  'where',
  'the',
  'other',
  'articles',
  'were',
  'deposited'],
 ['we',
  'there',
  'found',
  'them',
  'all',
  'unmolested',
  'and',
  'i',
  'proceeded',
  'immediately',
  'to',
  'business'],
 ['it', 'was', 'the', 'first', 'of'],
 ['the',
  'night',
  'as',
  'i',
  'said',
  'before',
  'was',
  'dark',
  'there',
  'was',
  'not',
  'a',
  'star',
  'to',
  'be',
  'seen',
  'and',
  'a',
  'drizzling',
  'rain',
  'falling',
  'at',
  'intervals',
  'rendered',
  'us',
  'very',
  'uncomfortable'],
 ['but',
  'my',
  'chief',
  'anxiety',
  'was',
  'concerning',
  'the',
  'balloon',
  'which',
  'in',
  'spite',
  'of',
  'the',
  'varnish',
  'with',
  'which',
  'it',
  'was',
  'defended',
  'began',
  'to',
  'grow',
  'rather',
  'heavy',
  'with',
  'the',
  'moisture',
  'the',
  'powder',
  'also',
  'was',
  'liable',
  'to',
  'damage'],
 ['i',
  'therefore',
  'kept',
  'my',
  'three',
  'duns',
  'working',
  'with',
  'great',
  'diligence',
  'pounding',
  'down',
  'ice',
  'around',
  'the',
  'central',
  'cask',
  'and',
  'stirring',
  'the',
  'acid',
  'in',
  'the',
  'others'],
 ['they',
  'did',
  'not',
  'cease',
  'however',
  'importuning',
  'me',
  'with',
  'questions',
  'as',
  'to',
  'what',
  'i',
  'intended',
  'to',
  'do',
  'with',
  'all',
  'this',
  'apparatus',
  'and',
  'expressed',
  'much',
  'dissatisfaction',
  'at',
  'the',
  'terrible',
  'labor',
  'i',
  'made',
  'them',
  'undergo'],
 ['they',
  'could',
  'not',
  'perceive',
  'so',
  'they',
  'said',
  'what',
  'good',
  'was',
  'likely',
  'to',
  'result',
  'from',
  'their',
  'getting',
  'wet',
  'to',
  'the',
  'skin',
  'merely',
  'to',
  'take',
  'a',
  'part',
  'in',
  'such',
  'horrible',
  'incantations'],
 ['i',
  'began',
  'to',
  'get',
  'uneasy',
  'and',
  'worked',
  'away',
  'with',
  'all',
  'my',
  'might',
  'for',
  'i',
  'verily',
  'believe',
  'the',
  'idiots',
  'supposed',
  'that',
  'i',
  'had',
  'entered',
  'into',
  'a',
  'compact',
  'with',
  'the',
  'devil',
  'and',
  'that',
  'in',
  'short',
  'what',
  'i',
  'was',
  'now',
  'doing',
  'was',
  'nothing',
  'better',
  'than',
  'it',
  'should',
  'be'],
 ['i',
  'was',
  'therefore',
  'in',
  'great',
  'fear',
  'of',
  'their',
  'leaving',
  'me',
  'altogether'],
 ['i',
  'contrived',
  'however',
  'to',
  'pacify',
  'them',
  'by',
  'promises',
  'of',
  'payment',
  'of',
  'all',
  'scores',
  'in',
  'full',
  'as',
  'soon',
  'as',
  'i',
  'could',
  'bring',
  'the',
  'present',
  'business',
  'to',
  'a',
  'termination'],
 ['to',
  'these',
  'speeches',
  'they',
  'gave',
  'of',
  'course',
  'their',
  'own',
  'interpretation',
  'fancying',
  'no',
  'doubt',
  'that',
  'at',
  'all',
  'events',
  'i',
  'should',
  'come',
  'into',
  'possession',
  'of',
  'vast',
  'quantities',
  'of',
  'ready',
  'money',
  'and',
  'provided',
  'i',
  'paid',
  'them',
  'all',
  'i',
  'owed',
  'and',
  'a',
  'trifle',
  'more',
  'in',
  'consideration',
  'of',
  'their',
  'services',
  'i',
  'dare',
  'say',
  'they',
  'cared',
  'very',
  'little',
  'what',
  'became',
  'of',
  'either',
  'my',
  'soul',
  'or',
  'my',
  'carcass'],
 ['in',
  'about',
  'four',
  'hours',
  'and',
  'a',
  'half',
  'i',
  'found',
  'the',
  'balloon',
  'sufficiently',
  'inflated'],
 ['i',
  'attached',
  'the',
  'car',
  'therefore',
  'and',
  'put',
  'all',
  'my',
  'implements',
  'in',
  'it',
  'not',
  'forgetting',
  'the',
  'condensing',
  'apparatus',
  'a',
  'copious',
  'supply',
  'of',
  'water',
  'and',
  'a',
  'large',
  'quantity',
  'of',
  'provisions',
  'such',
  'as',
  'pemmican',
  'in',
  'which',
  'much',
  'nutriment',
  'is',
  'contained',
  'in',
  'comparatively',
  'little',
  'bulk'],
 ['i',
  'also',
  'secured',
  'in',
  'the',
  'car',
  'a',
  'pair',
  'of',
  'pigeons',
  'and',
  'a',
  'cat'],
 ['it',
  'was',
  'now',
  'nearly',
  'daybreak',
  'and',
  'i',
  'thought',
  'it',
  'high',
  'time',
  'to',
  'take',
  'my',
  'departure'],
 ['dropping',
  'a',
  'lighted',
  'cigar',
  'on',
  'the',
  'ground',
  'as',
  'if',
  'by',
  'accident',
  'i',
  'took',
  'the',
  'opportunity',
  'in',
  'stooping',
  'to',
  'pick',
  'it',
  'up',
  'of',
  'igniting',
  'privately',
  'the',
  'piece',
  'of',
  'slow',
  'match',
  'whose',
  'end',
  'as',
  'i',
  'said',
  'before',
  'protruded',
  'a',
  'very',
  'little',
  'beyond',
  'the',
  'lower',
  'rim',
  'of',
  'one',
  'of',
  'the',
  'smaller',
  'casks'],
 ['this',
  'manoeuvre',
  'was',
  'totally',
  'unperceived',
  'on',
  'the',
  'part',
  'of',
  'the',
  'three',
  'duns',
  'and',
  'jumping',
  'into',
  'the',
  'car',
  'i',
  'immediately',
  'cut',
  'the',
  'single',
  'cord',
  'which',
  'held',
  'me',
  'to',
  'the',
  'earth',
  'and',
  'was',
  'pleased',
  'to',
  'find',
  'that',
  'i',
  'shot',
  'upward',
  'carrying',
  'with',
  'all',
  'ease',
  'one',
  'hundred',
  'and',
  'seventyfive',
  'pounds',
  'of',
  'leaden',
  'ballast',
  'and',
  'able',
  'to',
  'have',
  'carried',
  'up',
  'as',
  'many',
  'more'],
 ['scarcely',
  'however',
  'had',
  'i',
  'attained',
  'the',
  'height',
  'of',
  'fifty',
  'yards',
  'when',
  'roaring',
  'and',
  'rumbling',
  'up',
  'after',
  'me',
  'in',
  'the',
  'most',
  'horrible',
  'and',
  'tumultuous',
  'manner',
  'came',
  'so',
  'dense',
  'a',
  'hurricane',
  'of',
  'fire',
  'and',
  'smoke',
  'and',
  'sulphur',
  'and',
  'legs',
  'and',
  'arms',
  'and',
  'gravel',
  'and',
  'burning',
  'wood',
  'and',
  'blazing',
  'metal',
  'that',
  'my',
  'very',
  'heart',
  'sunk',
  'within',
  'me',
  'and',
  'i',
  'fell',
  'down',
  'in',
  'the',
  'bottom',
  'of',
  'the',
  'car',
  'trembling',
  'with',
  'unmitigated',
  'terror'],
 ['indeed',
  'i',
  'now',
  'perceived',
  'that',
  'i',
  'had',
  'entirely',
  'overdone',
  'the',
  'business',
  'and',
  'that',
  'the',
  'main',
  'consequences',
  'of',
  'the',
  'shock',
  'were',
  'yet',
  'to',
  'be',
  'experienced'],
 ['accordingly',
  'in',
  'less',
  'than',
  'a',
  'second',
  'i',
  'felt',
  'all',
  'the',
  'blood',
  'in',
  'my',
  'body',
  'rushing',
  'to',
  'my',
  'temples',
  'and',
  'immediately',
  'thereupon',
  'a',
  'concussion',
  'which',
  'i',
  'shall',
  'never',
  'forget',
  'burst',
  'abruptly',
  'through',
  'the',
  'night',
  'and',
  'seemed',
  'to',
  'rip',
  'the',
  'very',
  'firmament',
  'asunder'],
 ['when',
  'i',
  'afterward',
  'had',
  'time',
  'for',
  'reflection',
  'i',
  'did',
  'not',
  'fail',
  'to',
  'attribute',
  'the',
  'extreme',
  'violence',
  'of',
  'the',
  'explosion',
  'as',
  'regarded',
  'myself',
  'to',
  'its',
  'proper',
  'cause',
  'my',
  'situation',
  'directly',
  'above',
  'it',
  'and',
  'in',
  'the',
  'line',
  'of',
  'its',
  'greatest',
  'power'],
 ['but',
  'at',
  'the',
  'time',
  'i',
  'thought',
  'only',
  'of',
  'preserving',
  'my',
  'life'],
 ['the',
  'balloon',
  'at',
  'first',
  'collapsed',
  'then',
  'furiously',
  'expanded',
  'then',
  'whirled',
  'round',
  'and',
  'round',
  'with',
  'horrible',
  'velocity',
  'and',
  'finally',
  'reeling',
  'and',
  'staggering',
  'like',
  'a',
  'drunken',
  'man',
  'hurled',
  'me',
  'with',
  'great',
  'force',
  'over',
  'the',
  'rim',
  'of',
  'the',
  'car',
  'and',
  'left',
  'me',
  'dangling',
  'at',
  'a',
  'terrific',
  'height',
  'with',
  'my',
  'head',
  'downward',
  'and',
  'my',
  'face',
  'outwards',
  'by',
  'a',
  'piece',
  'of',
  'slender',
  'cord',
  'about',
  'three',
  'feet',
  'in',
  'length',
  'which',
  'hung',
  'accidentally',
  'through',
  'a',
  'crevice',
  'near',
  'the',
  'bottom',
  'of',
  'the',
  'wickerwork',
  'and',
  'in',
  'which',
  'as',
  'i',
  'fell',
  'my',
  'left',
  'foot',
  'became',
  'most',
  'providentially',
  'entangled'],
 ['it',
  'is',
  'impossible',
  'utterly',
  'impossible',
  'to',
  'form',
  'any',
  'adequate',
  'idea',
  'of',
  'the',
  'horror',
  'of',
  'my',
  'situation'],
 ['i',
  'gasped',
  'convulsively',
  'for',
  'breath',
  'a',
  'shudder',
  'resembling',
  'a',
  'fit',
  'of',
  'the',
  'ague',
  'agitated',
  'every',
  'nerve',
  'and',
  'muscle',
  'of',
  'my',
  'frame',
  'i',
  'felt',
  'my',
  'eyes',
  'starting',
  'from',
  'their',
  'sockets',
  'a',
  'horrible',
  'nausea',
  'overwhelmed',
  'me',
  'and',
  'at',
  'length',
  'i',
  'fainted',
  'away'],
 ['how',
  'long',
  'i',
  'remained',
  'in',
  'this',
  'state',
  'it',
  'is',
  'impossible',
  'to',
  'say'],
 ['it',
  'must',
  'however',
  'have',
  'been',
  'no',
  'inconsiderable',
  'time',
  'for',
  'when',
  'i',
  'partially',
  'recovered',
  'the',
  'sense',
  'of',
  'existence',
  'i',
  'found',
  'the',
  'day',
  'breaking',
  'the',
  'balloon',
  'at',
  'a',
  'prodigious',
  'height',
  'over',
  'a',
  'wilderness',
  'of',
  'ocean',
  'and',
  'not',
  'a',
  'trace',
  'of',
  'land',
  'to',
  'be',
  'discovered',
  'far',
  'and',
  'wide',
  'within',
  'the',
  'limits',
  'of',
  'the',
  'vast',
  'horizon'],
 ['my',
  'sensations',
  'however',
  'upon',
  'thus',
  'recovering',
  'were',
  'by',
  'no',
  'means',
  'so',
  'rife',
  'with',
  'agony',
  'as',
  'might',
  'have',
  'been',
  'anticipated'],
 ['indeed',
  'there',
  'was',
  'much',
  'of',
  'incipient',
  'madness',
  'in',
  'the',
  'calm',
  'survey',
  'which',
  'i',
  'began',
  'to',
  'take',
  'of',
  'my',
  'situation'],
 ['i',
  'drew',
  'up',
  'to',
  'my',
  'eyes',
  'each',
  'of',
  'my',
  'hands',
  'one',
  'after',
  'the',
  'other',
  'and',
  'wondered',
  'what',
  'occurrence',
  'could',
  'have',
  'given',
  'rise',
  'to',
  'the',
  'swelling',
  'of',
  'the',
  'veins',
  'and',
  'the',
  'horrible',
  'blackness',
  'of',
  'the',
  'fingernails'],
 ['i',
  'afterward',
  'carefully',
  'examined',
  'my',
  'head',
  'shaking',
  'it',
  'repeatedly',
  'and',
  'feeling',
  'it',
  'with',
  'minute',
  'attention',
  'until',
  'i',
  'succeeded',
  'in',
  'satisfying',
  'myself',
  'that',
  'it',
  'was',
  'not',
  'as',
  'i',
  'had',
  'more',
  'than',
  'half',
  'suspected',
  'larger',
  'than',
  'my',
  'balloon'],
 ['then',
  'in',
  'a',
  'knowing',
  'manner',
  'i',
  'felt',
  'in',
  'both',
  'my',
  'breeches',
  'pockets',
  'and',
  'missing',
  'therefrom',
  'a',
  'set',
  'of',
  'tablets',
  'and',
  'a',
  'toothpick',
  'case',
  'endeavored',
  'to',
  'account',
  'for',
  'their',
  'disappearance',
  'and',
  'not',
  'being',
  'able',
  'to',
  'do',
  'so',
  'felt',
  'inexpressibly',
  'chagrined'],
 ['it',
  'now',
  'occurred',
  'to',
  'me',
  'that',
  'i',
  'suffered',
  'great',
  'uneasiness',
  'in',
  'the',
  'joint',
  'of',
  'my',
  'left',
  'ankle',
  'and',
  'a',
  'dim',
  'consciousness',
  'of',
  'my',
  'situation',
  'began',
  'to',
  'glimmer',
  'through',
  'my',
  'mind'],
 ['but', 'strange', 'to', 'say'],
 ['i', 'was', 'neither', 'astonished', 'nor', 'horrorstricken'],
 ['if',
  'i',
  'felt',
  'any',
  'emotion',
  'at',
  'all',
  'it',
  'was',
  'a',
  'kind',
  'of',
  'chuckling',
  'satisfaction',
  'at',
  'the',
  'cleverness',
  'i',
  'was',
  'about',
  'to',
  'display',
  'in',
  'extricating',
  'myself',
  'from',
  'this',
  'dilemma',
  'and',
  'i',
  'never',
  'for',
  'a',
  'moment',
  'looked',
  'upon',
  'my',
  'ultimate',
  'safety',
  'as',
  'a',
  'question',
  'susceptible',
  'of',
  'doubt'],
 ['for',
  'a',
  'few',
  'minutes',
  'i',
  'remained',
  'wrapped',
  'in',
  'the',
  'profoundest',
  'meditation'],
 ['i',
  'have',
  'a',
  'distinct',
  'recollection',
  'of',
  'frequently',
  'compressing',
  'my',
  'lips',
  'putting',
  'my',
  'forefinger',
  'to',
  'the',
  'side',
  'of',
  'my',
  'nose',
  'and',
  'making',
  'use',
  'of',
  'other',
  'gesticulations',
  'and',
  'grimaces',
  'common',
  'to',
  'men',
  'who',
  'at',
  'ease',
  'in',
  'their',
  'armchairs',
  'meditate',
  'upon',
  'matters',
  'of',
  'intricacy',
  'or',
  'importance'],
 ['having',
  'as',
  'i',
  'thought',
  'sufficiently',
  'collected',
  'my',
  'ideas',
  'i',
  'now',
  'with',
  'great',
  'caution',
  'and',
  'deliberation',
  'put',
  'my',
  'hands',
  'behind',
  'my',
  'back',
  'and',
  'unfastened',
  'the',
  'large',
  'iron',
  'buckle',
  'which',
  'belonged',
  'to',
  'the',
  'waistband',
  'of',
  'my',
  'inexpressibles'],
 ['this',
  'buckle',
  'had',
  'three',
  'teeth',
  'which',
  'being',
  'somewhat',
  'rusty',
  'turned',
  'with',
  'great',
  'difficulty',
  'on',
  'their',
  'axis'],
 ['i',
  'brought',
  'them',
  'however',
  'after',
  'some',
  'trouble',
  'at',
  'right',
  'angles',
  'to',
  'the',
  'body',
  'of',
  'the',
  'buckle',
  'and',
  'was',
  'glad',
  'to',
  'find',
  'them',
  'remain',
  'firm',
  'in',
  'that',
  'position'],
 ['holding',
  'the',
  'instrument',
  'thus',
  'obtained',
  'within',
  'my',
  'teeth',
  'i',
  'now',
  'proceeded',
  'to',
  'untie',
  'the',
  'knot',
  'of',
  'my',
  'cravat'],
 ['i',
  'had',
  'to',
  'rest',
  'several',
  'times',
  'before',
  'i',
  'could',
  'accomplish',
  'this',
  'manoeuvre',
  'but',
  'it',
  'was',
  'at',
  'length',
  'accomplished'],
 ['to',
  'one',
  'end',
  'of',
  'the',
  'cravat',
  'i',
  'then',
  'made',
  'fast',
  'the',
  'buckle',
  'and',
  'the',
  'other',
  'end',
  'i',
  'tied',
  'for',
  'greater',
  'security',
  'tightly',
  'around',
  'my',
  'wrist'],
 ['drawing',
  'now',
  'my',
  'body',
  'upwards',
  'with',
  'a',
  'prodigious',
  'exertion',
  'of',
  'muscular',
  'force',
  'i',
  'succeeded',
  'at',
  'the',
  'very',
  'first',
  'trial',
  'in',
  'throwing',
  'the',
  'buckle',
  'over',
  'the',
  'car',
  'and',
  'entangling',
  'it',
  'as',
  'i',
  'had',
  'anticipated',
  'in',
  'the',
  'circular',
  'rim',
  'of',
  'the',
  'wickerwork'],
 ['my',
  'body',
  'was',
  'now',
  'inclined',
  'towards',
  'the',
  'side',
  'of',
  'the',
  'car',
  'at',
  'an',
  'angle',
  'of',
  'about',
  'fortyfive',
  'degrees',
  'but',
  'it',
  'must',
  'not',
  'be',
  'understood',
  'that',
  'i',
  'was',
  'therefore',
  'only',
  'fortyfive',
  'degrees',
  'below',
  'the',
  'perpendicular'],
 ['so',
  'far',
  'from',
  'it',
  'i',
  'still',
  'lay',
  'nearly',
  'level',
  'with',
  'the',
  'plane',
  'of',
  'the',
  'horizon',
  'for',
  'the',
  'change',
  'of',
  'situation',
  'which',
  'i',
  'had',
  'acquired',
  'had',
  'forced',
  'the',
  'bottom',
  'of',
  'the',
  'car',
  'considerably',
  'outwards',
  'from',
  'my',
  'position',
  'which',
  'was',
  'accordingly',
  'one',
  'of',
  'the',
  'most',
  'imminent',
  'and',
  'deadly',
  'peril'],
 ['it',
  'should',
  'be',
  'remembered',
  'however',
  'that',
  'when',
  'i',
  'fell',
  'in',
  'the',
  'first',
  'instance',
  'from',
  'the',
  'car',
  'if',
  'i',
  'had',
  'fallen',
  'with',
  'my',
  'face',
  'turned',
  'toward',
  'the',
  'balloon',
  'instead',
  'of',
  'turned',
  'outwardly',
  'from',
  'it',
  'as',
  'it',
  'actually',
  'was',
  'or',
  'if',
  'in',
  'the',
  'second',
  'place',
  'the',
  'cord',
  'by',
  'which',
  'i',
  'was',
  'suspended',
  'had',
  'chanced',
  'to',
  'hang',
  'over',
  'the',
  'upper',
  'edge',
  'instead',
  'of',
  'through',
  'a',
  'crevice',
  'near',
  'the',
  'bottom',
  'of',
  'the',
  'car',
  'i',
  'say',
  'it',
  'may',
  'be',
  'readily',
  'conceived',
  'that',
  'in',
  'either',
  'of',
  'these',
  'supposed',
  'cases',
  'i',
  'should',
  'have',
  'been',
  'unable',
  'to',
  'accomplish',
  'even',
  'as',
  'much',
  'as',
  'i',
  'had',
  'now',
  'accomplished',
  'and',
  'the',
  'wonderful',
  'adventures',
  'of',
  'would',
  'have',
  'been',
  'utterly',
  'lost',
  'to',
  'posterity',
  'i',
  'had',
  'therefore',
  'every',
  'reason',
  'to',
  'be',
  'grateful',
  'although',
  'in',
  'point',
  'of',
  'fact',
  'i',
  'was',
  'still',
  'too',
  'stupid',
  'to',
  'be',
  'anything',
  'at',
  'all',
  'and',
  'hung',
  'for',
  'perhaps',
  'a',
  'quarter',
  'of',
  'an',
  'hour',
  'in',
  'that',
  'extraordinary',
  'manner',
  'without',
  'making',
  'the',
  'slightest',
  'farther',
  'exertion',
  'whatsoever',
  'and',
  'in',
  'a',
  'singularly',
  'tranquil',
  'state',
  'of',
  'idiotic',
  'enjoyment'],
 ['but',
  'this',
  'feeling',
  'did',
  'not',
  'fail',
  'to',
  'die',
  'rapidly',
  'away',
  'and',
  'thereunto',
  'succeeded',
  'horror',
  'and',
  'dismay',
  'and',
  'a',
  'chilling',
  'sense',
  'of',
  'utter',
  'helplessness',
  'and',
  'ruin'],
 ['in',
  'fact',
  'the',
  'blood',
  'so',
  'long',
  'accumulating',
  'in',
  'the',
  'vessels',
  'of',
  'my',
  'head',
  'and',
  'throat',
  'and',
  'which',
  'had',
  'hitherto',
  'buoyed',
  'up',
  'my',
  'spirits',
  'with',
  'madness',
  'and',
  'delirium',
  'had',
  'now',
  'begun',
  'to',
  'retire',
  'within',
  'their',
  'proper',
  'channels',
  'and',
  'the',
  'distinctness',
  'which',
  'was',
  'thus',
  'added',
  'to',
  'my',
  'perception',
  'of',
  'the',
  'danger',
  'merely',
  'served',
  'to',
  'deprive',
  'me',
  'of',
  'the',
  'selfpossession',
  'and',
  'courage',
  'to',
  'encounter',
  'it'],
 ['but',
  'this',
  'weakness',
  'was',
  'luckily',
  'for',
  'me',
  'of',
  'no',
  'very',
  'long',
  'duration'],
 ['in',
  'good',
  'time',
  'came',
  'to',
  'my',
  'rescue',
  'the',
  'spirit',
  'of',
  'despair',
  'and',
  'with',
  'frantic',
  'cries',
  'and',
  'struggles',
  'i',
  'jerked',
  'my',
  'way',
  'bodily',
  'upwards',
  'till',
  'at',
  'length',
  'clutching',
  'with',
  'a',
  'viselike',
  'grip',
  'the',
  'longdesired',
  'rim',
  'i',
  'writhed',
  'my',
  'person',
  'over',
  'it',
  'and',
  'fell',
  'headlong',
  'and',
  'shuddering',
  'within',
  'the',
  'car'],
 ['it',
  'was',
  'not',
  'until',
  'some',
  'time',
  'afterward',
  'that',
  'i',
  'recovered',
  'myself',
  'sufficiently',
  'to',
  'attend',
  'to',
  'the',
  'ordinary',
  'cares',
  'of',
  'the',
  'balloon'],
 ['i',
  'then',
  'however',
  'examined',
  'it',
  'with',
  'attention',
  'and',
  'found',
  'it',
  'to',
  'my',
  'great',
  'relief',
  'uninjured'],
 ['my',
  'implements',
  'were',
  'all',
  'safe',
  'and',
  'fortunately',
  'i',
  'had',
  'lost',
  'neither',
  'ballast',
  'nor',
  'provisions'],
 ['indeed',
  'i',
  'had',
  'so',
  'well',
  'secured',
  'them',
  'in',
  'their',
  'places',
  'that',
  'such',
  'an',
  'accident',
  'was',
  'entirely',
  'out',
  'of',
  'the',
  'question'],
 ['looking', 'at', 'my', 'watch', 'i', 'found', 'it', 'six', 'o', 'clock'],
 ['i',
  'was',
  'still',
  'rapidly',
  'ascending',
  'and',
  'my',
  'barometer',
  'gave',
  'a',
  'present',
  'altitude',
  'of',
  'three',
  'and',
  'threequarter',
  'miles'],
 ['immediately',
  'beneath',
  'me',
  'in',
  'the',
  'ocean',
  'lay',
  'a',
  'small',
  'black',
  'object',
  'slightly',
  'oblong',
  'in',
  'shape',
  'seemingly',
  'about',
  'the',
  'size',
  'and',
  'in',
  'every',
  'way',
  'bearing',
  'a',
  'great',
  'resemblance',
  'to',
  'one',
  'of',
  'those',
  'childish',
  'toys',
  'called',
  'a',
  'domino'],
 ['bringing',
  'my',
  'telescope',
  'to',
  'bear',
  'upon',
  'it',
  'i',
  'plainly',
  'discerned',
  'it',
  'to',
  'be',
  'a',
  'british',
  'ninety',
  'fourgun',
  'ship',
  'closehauled',
  'and',
  'pitching',
  'heavily',
  'in',
  'the',
  'sea',
  'with',
  'her',
  'head',
  'to',
  'the'],
 ['besides',
  'this',
  'ship',
  'i',
  'saw',
  'nothing',
  'but',
  'the',
  'ocean',
  'and',
  'the',
  'sky',
  'and',
  'the',
  'sun',
  'which',
  'had',
  'long',
  'arisen'],
 ['it',
  'is',
  'now',
  'high',
  'time',
  'that',
  'i',
  'should',
  'explain',
  'to',
  'your',
  'excellencies',
  'the',
  'object',
  'of',
  'my',
  'perilous',
  'voyage'],
 ['your',
  'excellencies',
  'will',
  'bear',
  'in',
  'mind',
  'that',
  'distressed',
  'circumstances',
  'in',
  'had',
  'at',
  'length',
  'driven',
  'me',
  'to',
  'the',
  'resolution',
  'of',
  'committing',
  'suicide'],
 ['it',
  'was',
  'not',
  'however',
  'that',
  'to',
  'life',
  'itself',
  'i',
  'had',
  'any',
  'positive',
  'disgust',
  'but',
  'that',
  'i',
  'was',
  'harassed',
  'beyond',
  'endurance',
  'by',
  'the',
  'adventitious',
  'miseries',
  'attending',
  'my',
  'situation'],
 ['in',
  'this',
  'state',
  'of',
  'mind',
  'wishing',
  'to',
  'live',
  'yet',
  'wearied',
  'with',
  'life',
  'the',
  'treatise',
  'at',
  'the',
  'stall',
  'of',
  'the',
  'bookseller',
  'opened',
  'a',
  'resource',
  'to',
  'my',
  'imagination'],
 ['i', 'then', 'finally', 'made', 'up', 'my', 'mind'],
 ['i',
  'determined',
  'to',
  'depart',
  'yet',
  'live',
  'to',
  'leave',
  'the',
  'world',
  'yet',
  'continue',
  'to',
  'exist',
  'in',
  'short',
  'to',
  'drop',
  'enigmas',
  'i',
  'resolved',
  'let',
  'what',
  'would',
  'ensue',
  'to',
  'force',
  'a',
  'passage',
  'if',
  'i',
  'could',
  'to',
  'the',
  'moon'],
 ['now',
  'lest',
  'i',
  'should',
  'be',
  'supposed',
  'more',
  'of',
  'a',
  'madman',
  'than',
  'i',
  'actually',
  'am',
  'i',
  'will',
  'detail',
  'as',
  'well',
  'as',
  'i',
  'am',
  'able',
  'the',
  'considerations',
  'which',
  'led',
  'me',
  'to',
  'believe',
  'that',
  'an',
  'achievement',
  'of',
  'this',
  'nature',
  'although',
  'without',
  'doubt',
  'difficult',
  'and',
  'incontestably',
  'full',
  'of',
  'danger',
  'was',
  'not',
  'absolutely',
  'to',
  'a',
  'bold',
  'spirit',
  'beyond',
  'the',
  'confines',
  'of',
  'the',
  'possible'],
 ['the',
  'moon',
  's',
  'actual',
  'distance',
  'from',
  'the',
  'earth',
  'was',
  'the',
  'first',
  'thing',
  'to',
  'be',
  'attended',
  'to'],
 ['now',
  'the',
  'mean',
  'or',
  'average',
  'interval',
  'between',
  'the',
  'centres',
  'of',
  'the',
  'two',
  'planets',
  'is',
  '599643',
  'of',
  'the',
  'earth',
  's',
  'equatorial',
  'radii',
  'or',
  'only',
  'about',
  '237000',
  'miles'],
 ['i', 'say', 'the', 'mean', 'or', 'average', 'interval'],
 ['but',
  'it',
  'must',
  'be',
  'borne',
  'in',
  'mind',
  'that',
  'the',
  'form',
  'of',
  'the',
  'moon',
  's',
  'orbit',
  'being',
  'an',
  'ellipse',
  'of',
  'eccentricity',
  'amounting',
  'to',
  'no',
  'less',
  'than',
  '005484',
  'of',
  'the',
  'major',
  'semiaxis',
  'of',
  'the',
  'ellipse',
  'itself',
  'and',
  'the',
  'earth',
  's',
  'centre',
  'being',
  'situated',
  'in',
  'its',
  'focus',
  'if',
  'i',
  'could',
  'in',
  'any',
  'manner',
  'contrive',
  'to',
  'meet',
  'the',
  'moon',
  'as',
  'it',
  'were',
  'in',
  'its',
  'perigee',
  'the',
  'above',
  'mentioned',
  'distance',
  'would',
  'be',
  'materially',
  'diminished'],
 ['but',
  'to',
  'say',
  'nothing',
  'at',
  'present',
  'of',
  'this',
  'possibility',
  'it',
  'was',
  'very',
  'certain',
  'that',
  'at',
  'all',
  'events',
  'from',
  'the',
  '237000',
  'miles',
  'i',
  'would',
  'have',
  'to',
  'deduct',
  'the',
  'radius',
  'of',
  'the',
  'earth',
  'say',
  '4000',
  'and',
  'the',
  'radius',
  'of',
  'the',
  'moon',
  'say',
  '1080',
  'in',
  'all',
  '5080',
  'leaving',
  'an',
  'actual',
  'interval',
  'to',
  'be',
  'traversed',
  'under',
  'average',
  'circumstances',
  'of',
  '231920',
  'miles'],
 ['now',
  'this',
  'i',
  'reflected',
  'was',
  'no',
  'very',
  'extraordinary',
  'distance'],
 ['travelling',
  'on',
  'land',
  'has',
  'been',
  'repeatedly',
  'accomplished',
  'at',
  'the',
  'rate',
  'of',
  'thirty',
  'miles',
  'per',
  'hour',
  'and',
  'indeed',
  'a',
  'much',
  'greater',
  'speed',
  'may',
  'be',
  'anticipated'],
 ['but',
  'even',
  'at',
  'this',
  'velocity',
  'it',
  'would',
  'take',
  'me',
  'no',
  'more',
  'than',
  '322',
  'days',
  'to',
  'reach',
  'the',
  'surface',
  'of',
  'the',
  'moon'],
 ['there',
  'were',
  'however',
  'many',
  'particulars',
  'inducing',
  'me',
  'to',
  'believe',
  'that',
  'my',
  'average',
  'rate',
  'of',
  'travelling',
  'might',
  'possibly',
  'very',
  'much',
  'exceed',
  'that',
  'of',
  'thirty',
  'miles',
  'per',
  'hour',
  'and',
  'as',
  'these',
  'considerations',
  'did',
  'not',
  'fail',
  'to',
  'make',
  'a',
  'deep',
  'impression',
  'upon',
  'my',
  'mind',
  'i',
  'will',
  'mention',
  'them',
  'more',
  'fully',
  'hereafter'],
 ['the',
  'next',
  'point',
  'to',
  'be',
  'regarded',
  'was',
  'a',
  'matter',
  'of',
  'far',
  'greater',
  'importance'],
 ['from',
  'indications',
  'afforded',
  'by',
  'the',
  'barometer',
  'we',
  'find',
  'that',
  'in',
  'ascensions',
  'from',
  'the',
  'surface',
  'of',
  'the',
  'earth',
  'we',
  'have',
  'at',
  'the',
  'height',
  'of',
  '1000',
  'feet',
  'left',
  'below',
  'us',
  'about',
  'onethirtieth',
  'of',
  'the',
  'entire',
  'mass',
  'of',
  'atmospheric',
  'air',
  'that',
  'at',
  '10600',
  'we',
  'have',
  'ascended',
  'through',
  'nearly',
  'onethird',
  'and',
  'that',
  'at',
  '18000',
  'which',
  'is',
  'not',
  'far',
  'from',
  'the',
  'elevation',
  'of',
  'we',
  'have',
  'surmounted',
  'onehalf',
  'the',
  'material',
  'or',
  'at',
  'all',
  'events',
  'onehalf',
  'the',
  'ponderable',
  'body',
  'of',
  'air',
  'incumbent',
  'upon',
  'our',
  'globe'],
 ['it',
  'is',
  'also',
  'calculated',
  'that',
  'at',
  'an',
  'altitude',
  'not',
  'exceeding',
  'the',
  'hundredth',
  'part',
  'of',
  'the',
  'earth',
  's',
  'diameter',
  'that',
  'is',
  'not',
  'exceeding',
  'eighty',
  'miles',
  'the',
  'rarefaction',
  'would',
  'be',
  'so',
  'excessive',
  'that',
  'animal',
  'life',
  'could',
  'in',
  'no',
  'manner',
  'be',
  'sustained',
  'and',
  'moreover',
  'that',
  'the',
  'most',
  'delicate',
  'means',
  'we',
  'possess',
  'of',
  'ascertaining',
  'the',
  'presence',
  'of',
  'the',
  'atmosphere',
  'would',
  'be',
  'inadequate',
  'to',
  'assure',
  'us',
  'of',
  'its',
  'existence'],
 ['but',
  'i',
  'did',
  'not',
  'fail',
  'to',
  'perceive',
  'that',
  'these',
  'latter',
  'calculations',
  'are',
  'founded',
  'altogether',
  'on',
  'our',
  'experimental',
  'knowledge',
  'of',
  'the',
  'properties',
  'of',
  'air',
  'and',
  'the',
  'mechanical',
  'laws',
  'regulating',
  'its',
  'dilation',
  'and',
  'compression',
  'in',
  'what',
  'may',
  'be',
  'called',
  'comparatively',
  'speaking',
  'the',
  'immediate',
  'vicinity',
  'of',
  'the',
  'earth',
  'itself',
  'and',
  'at',
  'the',
  'same',
  'time',
  'it',
  'is',
  'taken',
  'for',
  'granted',
  'that',
  'animal',
  'life',
  'is',
  'and',
  'must',
  'be',
  'essentially',
  'incapable',
  'of',
  'modification',
  'at',
  'any',
  'given',
  'unattainable',
  'distance',
  'from',
  'the',
  'surface'],
 ['now',
  'all',
  'such',
  'reasoning',
  'and',
  'from',
  'such',
  'data',
  'must',
  'of',
  'course',
  'be',
  'simply',
  'analogical'],
 ['the',
  'greatest',
  'height',
  'ever',
  'reached',
  'by',
  'man',
  'was',
  'that',
  'of',
  '25000',
  'feet',
  'attained',
  'in',
  'the',
  'aeronautic',
  'expedition',
  'of',
  'and'],
 ['this',
  'is',
  'a',
  'moderate',
  'altitude',
  'even',
  'when',
  'compared',
  'with',
  'the',
  'eighty',
  'miles',
  'in',
  'question',
  'and',
  'i',
  'could',
  'not',
  'help',
  'thinking',
  'that',
  'the',
  'subject',
  'admitted',
  'room',
  'for',
  'doubt',
  'and',
  'great',
  'latitude',
  'for',
  'speculation'],
 ['but',
  'in',
  'point',
  'of',
  'fact',
  'an',
  'ascension',
  'being',
  'made',
  'to',
  'any',
  'given',
  'altitude',
  'the',
  'ponderable',
  'quantity',
  'of',
  'air',
  'surmounted',
  'in',
  'any',
  'farther',
  'ascension',
  'is',
  'by',
  'no',
  'means',
  'in',
  'proportion',
  'to',
  'the',
  'additional',
  'height',
  'ascended',
  'as',
  'may',
  'be',
  'plainly',
  'seen',
  'from',
  'what',
  'has',
  'been',
  'stated',
  'before',
  'but',
  'in',
  'a',
  'ratio',
  'constantly',
  'decreasing'],
 ['it',
  'is',
  'therefore',
  'evident',
  'that',
  'ascend',
  'as',
  'high',
  'as',
  'we',
  'may',
  'we',
  'can',
  'not',
  'literally',
  'speaking',
  'arrive',
  'at',
  'a',
  'limit',
  'beyond',
  'which',
  'no',
  'atmosphere',
  'is',
  'to',
  'be',
  'found'],
 ['it',
  'must',
  'exist',
  'i',
  'argued',
  'although',
  'it',
  'may',
  'exist',
  'in',
  'a',
  'state',
  'of',
  'infinite',
  'rarefaction'],
 ['on',
  'the',
  'other',
  'hand',
  'i',
  'was',
  'aware',
  'that',
  'arguments',
  'have',
  'not',
  'been',
  'wanting',
  'to',
  'prove',
  'the',
  'existence',
  'of',
  'a',
  'real',
  'and',
  'definite',
  'limit',
  'to',
  'the',
  'atmosphere',
  'beyond',
  'which',
  'there',
  'is',
  'absolutely',
  'no',
  'air',
  'whatsoever'],
 ['but',
  'a',
  'circumstance',
  'which',
  'has',
  'been',
  'left',
  'out',
  'of',
  'view',
  'by',
  'those',
  'who',
  'contend',
  'for',
  'such',
  'a',
  'limit',
  'seemed',
  'to',
  'me',
  'although',
  'no',
  'positive',
  'refutation',
  'of',
  'their',
  'creed',
  'still',
  'a',
  'point',
  'worthy',
  'very',
  'serious',
  'investigation'],
 ['on',
  'comparing',
  'the',
  'intervals',
  'between',
  'the',
  'successive',
  'arrivals',
  'of',
  's',
  'comet',
  'at',
  'its',
  'perihelion',
  'after',
  'giving',
  'credit',
  'in',
  'the',
  'most',
  'exact',
  'manner',
  'for',
  'all',
  'the',
  'disturbances',
  'due',
  'to',
  'the',
  'attractions',
  'of',
  'the',
  'planets',
  'it',
  'appears',
  'that',
  'the',
  'periods',
  'are',
  'gradually',
  'diminishing',
  'that',
  'is',
  'to',
  'say',
  'the',
  'major',
  'axis',
  'of',
  'the',
  'comet',
  's',
  'ellipse',
  'is',
  'growing',
  'shorter',
  'in',
  'a',
  'slow',
  'but',
  'perfectly',
  'regular',
  'decrease'],
 ['now',
  'this',
  'is',
  'precisely',
  'what',
  'ought',
  'to',
  'be',
  'the',
  'case',
  'if',
  'we',
  'suppose',
  'a',
  'resistance',
  'experienced',
  'from',
  'the',
  'comet',
  'from',
  'an',
  'extremely',
  'rare',
  'ethereal',
  'medium',
  'pervading',
  'the',
  'regions',
  'of',
  'its',
  'orbit'],
 ['for',
  'it',
  'is',
  'evident',
  'that',
  'such',
  'a',
  'medium',
  'must',
  'in',
  'retarding',
  'the',
  'comet',
  's',
  'velocity',
  'increase',
  'its',
  'centripetal',
  'by',
  'weakening',
  'its',
  'centrifugal',
  'force'],
 ['in',
  'other',
  'words',
  'the',
  'sun',
  's',
  'attraction',
  'would',
  'be',
  'constantly',
  'attaining',
  'greater',
  'power',
  'and',
  'the',
  'comet',
  'would',
  'be',
  'drawn',
  'nearer',
  'at',
  'every',
  'revolution'],
 ['indeed',
  'there',
  'is',
  'no',
  'other',
  'way',
  'of',
  'accounting',
  'for',
  'the',
  'variation',
  'in',
  'question'],
 ['but', 'again'],
 ['the',
  'real',
  'diameter',
  'of',
  'the',
  'same',
  'comet',
  's',
  'nebulosity',
  'is',
  'observed',
  'to',
  'contract',
  'rapidly',
  'as',
  'it',
  'approaches',
  'the',
  'sun',
  'and',
  'dilate',
  'with',
  'equal',
  'rapidity',
  'in',
  'its',
  'departure',
  'towards',
  'its',
  'aphelion'],
 ['i',
  'not',
  'justifiable',
  'in',
  'supposing',
  'with',
  'that',
  'this',
  'apparent',
  'condensation',
  'of',
  'volume',
  'has',
  'its',
  'origin',
  'in',
  'the',
  'compression',
  'of',
  'the',
  'same',
  'ethereal',
  'medium',
  'i',
  'have',
  'spoken',
  'of',
  'before',
  'and',
  'which',
  'is',
  'only',
  'denser',
  'in',
  'proportion',
  'to',
  'its',
  'solar',
  'vicinity'],
 ['the',
  'lenticularshaped',
  'phenomenon',
  'also',
  'called',
  'the',
  'zodiacal',
  'light',
  'was',
  'a',
  'matter',
  'worthy',
  'of',
  'attention'],
 ['this',
  'radiance',
  'so',
  'apparent',
  'in',
  'the',
  'tropics',
  'and',
  'which',
  'can',
  'not',
  'be',
  'mistaken',
  'for',
  'any',
  'meteoric',
  'lustre',
  'extends',
  'from',
  'the',
  'horizon',
  'obliquely',
  'upward',
  'and',
  'follows',
  'generally',
  'the',
  'direction',
  'of',
  'the',
  'sun',
  's',
  'equator'],
 ['it',
  'appeared',
  'to',
  'me',
  'evidently',
  'in',
  'the',
  'nature',
  'of',
  'a',
  'rare',
  'atmosphere',
  'extending',
  'from',
  'the',
  'sun',
  'outward',
  'beyond',
  'the',
  'orbit',
  'of',
  'at',
  'least',
  'and',
  'i',
  'believed',
  'indefinitely',
  'farther'],
 ['2',
  'indeed',
  'this',
  'medium',
  'i',
  'could',
  'not',
  'suppose',
  'confined',
  'to',
  'the',
  'path',
  'of',
  'the',
  'comet',
  's',
  'ellipse',
  'or',
  'to',
  'the',
  'immediate',
  'neighborhood',
  'of',
  'the',
  'sun'],
 ['it',
  'was',
  'easy',
  'on',
  'the',
  'contrary',
  'to',
  'imagine',
  'it',
  'pervading',
  'the',
  'entire',
  'regions',
  'of',
  'our',
  'planetary',
  'system',
  'condensed',
  'into',
  'what',
  'we',
  'call',
  'atmosphere',
  'at',
  'the',
  'planets',
  'themselves',
  'and',
  'perhaps',
  'at',
  'some',
  'of',
  'them',
  'modified',
  'by',
  'considerations',
  'so',
  'to',
  'speak',
  'purely',
  'geological'],
 ['having',
  'adopted',
  'this',
  'view',
  'of',
  'the',
  'subject',
  'i',
  'had',
  'little',
  'further',
  'hesitation'],
 ['granting',
  'that',
  'on',
  'my',
  'passage',
  'i',
  'should',
  'meet',
  'with',
  'atmosphere',
  'essentially',
  'the',
  'same',
  'as',
  'at',
  'the',
  'surface',
  'of',
  'the',
  'earth',
  'i',
  'conceived',
  'that',
  'by',
  'means',
  'of',
  'the',
  'very',
  'ingenious',
  'apparatus',
  'of',
  'i',
  'should',
  'readily',
  'be',
  'enabled',
  'to',
  'condense',
  'it',
  'in',
  'sufficient',
  'quantity',
  'for',
  'the',
  'purposes',
  'of',
  'respiration'],
 ['this',
  'would',
  'remove',
  'the',
  'chief',
  'obstacle',
  'in',
  'a',
  'journey',
  'to',
  'the',
  'moon'],
 ['i',
  'had',
  'indeed',
  'spent',
  'some',
  'money',
  'and',
  'great',
  'labor',
  'in',
  'adapting',
  'the',
  'apparatus',
  'to',
  'the',
  'object',
  'intended',
  'and',
  'confidently',
  'looked',
  'forward',
  'to',
  'its',
  'successful',
  'application',
  'if',
  'i',
  'could',
  'manage',
  'to',
  'complete',
  'the',
  'voyage',
  'within',
  'any',
  'reasonable',
  'period'],
 ['this',
  'brings',
  'me',
  'back',
  'to',
  'the',
  'rate',
  'at',
  'which',
  'it',
  'might',
  'be',
  'possible',
  'to',
  'travel'],
 ['it',
  'is',
  'true',
  'that',
  'balloons',
  'in',
  'the',
  'first',
  'stage',
  'of',
  'their',
  'ascensions',
  'from',
  'the',
  'earth',
  'are',
  'known',
  'to',
  'rise',
  'with',
  'a',
  'velocity',
  'comparatively',
  'moderate'],
 ['now',
  'the',
  'power',
  'of',
  'elevation',
  'lies',
  'altogether',
  'in',
  'the',
  'superior',
  'lightness',
  'of',
  'the',
  'gas',
  'in',
  'the',
  'balloon',
  'compared',
  'with',
  'the',
  'atmospheric',
  'air',
  'and',
  'at',
  'first',
  'sight',
  'it',
  'does',
  'not',
  'appear',
  'probable',
  'that',
  'as',
  'the',
  'balloon',
  'acquires',
  'altitude',
  'and',
  'consequently',
  'arrives',
  'successively',
  'in',
  'atmospheric',
  'strata',
  'of',
  'densities',
  'rapidly',
  'diminishing',
  'i',
  'say',
  'it',
  'does',
  'not',
  'appear',
  'at',
  'all',
  'reasonable',
  'that',
  'in',
  'this',
  'its',
  'progress',
  'upwards',
  'the',
  'original',
  'velocity',
  'should',
  'be',
  'accelerated'],
 ['on',
  'the',
  'other',
  'hand',
  'i',
  'was',
  'not',
  'aware',
  'that',
  'in',
  'any',
  'recorded',
  'ascension',
  'a',
  'diminution',
  'was',
  'apparent',
  'in',
  'the',
  'absolute',
  'rate',
  'of',
  'ascent',
  'although',
  'such',
  'should',
  'have',
  'been',
  'the',
  'case',
  'if',
  'on',
  'account',
  'of',
  'nothing',
  'else',
  'on',
  'account',
  'of',
  'the',
  'escape',
  'of',
  'gas',
  'through',
  'balloons',
  'illconstructed',
  'and',
  'varnished',
  'with',
  'no',
  'better',
  'material',
  'than',
  'the',
  'ordinary',
  'varnish'],
 ['it',
  'seemed',
  'therefore',
  'that',
  'the',
  'effect',
  'of',
  'such',
  'escape',
  'was',
  'only',
  'sufficient',
  'to',
  'counterbalance',
  'the',
  'effect',
  'of',
  'some',
  'accelerating',
  'power'],
 ['i',
  'now',
  'considered',
  'that',
  'provided',
  'in',
  'my',
  'passage',
  'i',
  'found',
  'the',
  'medium',
  'i',
  'had',
  'imagined',
  'and',
  'provided',
  'that',
  'it',
  'should',
  'prove',
  'to',
  'be',
  'actually',
  'and',
  'essentially',
  'what',
  'we',
  'denominate',
  'atmospheric',
  'air',
  'it',
  'could',
  'make',
  'comparatively',
  'little',
  'difference',
  'at',
  'what',
  'extreme',
  'state',
  'of',
  'rarefaction',
  'i',
  'should',
  'discover',
  'it',
  'that',
  'is',
  'to',
  'say',
  'in',
  'regard',
  'to',
  'my',
  'power',
  'of',
  'ascending',
  'for',
  'the',
  'gas',
  'in',
  'the',
  'balloon',
  'would',
  'not',
  'only',
  'be',
  'itself',
  'subject',
  'to',
  'rarefaction',
  'partially',
  'similar',
  'in',
  'proportion',
  'to',
  'the',
  'occurrence',
  'of',
  'which',
  'i',
  'could',
  'suffer',
  'an',
  'escape',
  'of',
  'so',
  'much',
  'as',
  'would',
  'be',
  'requisite',
  'to',
  'prevent',
  'explosion',
  'but',
  'being',
  'what',
  'it',
  'was',
  'would',
  'at',
  'all',
  'events',
  'continue',
  'specifically',
  'lighter',
  'than',
  'any',
  'compound',
  'whatever',
  'of',
  'mere',
  'nitrogen',
  'and',
  'oxygen'],
 ['in',
  'the',
  'meantime',
  'the',
  'force',
  'of',
  'gravitation',
  'would',
  'be',
  'constantly',
  'diminishing',
  'in',
  'proportion',
  'to',
  'the',
  'squares',
  'of',
  'the',
  'distances',
  'and',
  'thus',
  'with',
  'a',
  'velocity',
  'prodigiously',
  'accelerating',
  'i',
  'should',
  'at',
  'length',
  'arrive',
  'in',
  'those',
  'distant',
  'regions',
  'where',
  'the',
  'force',
  'of',
  'the',
  'earth',
  's',
  'attraction',
  'would',
  'be',
  'superseded',
  'by',
  'that',
  'of',
  'the',
  'moon'],
 ['in',
  'accordance',
  'with',
  'these',
  'ideas',
  'i',
  'did',
  'not',
  'think',
  'it',
  'worth',
  'while',
  'to',
  'encumber',
  'myself',
  'with',
  'more',
  'provisions',
  'than',
  'would',
  'be',
  'sufficient',
  'for',
  'a',
  'period',
  'of',
  'forty',
  'days'],
 ['there',
  'was',
  'still',
  'however',
  'another',
  'difficulty',
  'which',
  'occasioned',
  'me',
  'some',
  'little',
  'disquietude'],
 ['it',
  'has',
  'been',
  'observed',
  'that',
  'in',
  'balloon',
  'ascensions',
  'to',
  'any',
  'considerable',
  'height',
  'besides',
  'the',
  'pain',
  'attending',
  'respiration',
  'great',
  'uneasiness',
  'is',
  'experienced',
  'about',
  'the',
  'head',
  'and',
  'body',
  'often',
  'accompanied',
  'with',
  'bleeding',
  'at',
  'the',
  'nose',
  'and',
  'other',
  'symptoms',
  'of',
  'an',
  'alarming',
  'kind',
  'and',
  'growing',
  'more',
  'and',
  'more',
  'inconvenient',
  'in',
  'proportion',
  'to',
  'the',
  'altitude',
  'attained'],
 ['3',
  'this',
  'was',
  'a',
  'reflection',
  'of',
  'a',
  'nature',
  'somewhat',
  'startling'],
 ['was',
  'it',
  'not',
  'probable',
  'that',
  'these',
  'symptoms',
  'would',
  'increase',
  'indefinitely',
  'or',
  'at',
  'least',
  'until',
  'terminated',
  'by',
  'death',
  'itself'],
 ['i', 'finally', 'thought', 'not'],
 ['their',
  'origin',
  'was',
  'to',
  'be',
  'looked',
  'for',
  'in',
  'the',
  'progressive',
  'removal',
  'of',
  'the',
  'customary',
  'atmospheric',
  'pressure',
  'upon',
  'the',
  'surface',
  'of',
  'the',
  'body',
  'and',
  'consequent',
  'distention',
  'of',
  'the',
  'superficial',
  'bloodvessels',
  'not',
  'in',
  'any',
  'positive',
  'disorganization',
  'of',
  'the',
  'animal',
  'system',
  'as',
  'in',
  'the',
  'case',
  'of',
  'difficulty',
  'in',
  'breathing',
  'where',
  'the',
  'atmospheric',
  'density',
  'is',
  'chemically',
  'insufficient',
  'for',
  'the',
  'due',
  'renovation',
  'of',
  'blood',
  'in',
  'a',
  'ventricle',
  'of',
  'the',
  'heart'],
 ['unless',
  'for',
  'default',
  'of',
  'this',
  'renovation',
  'i',
  'could',
  'see',
  'no',
  'reason',
  'therefore',
  'why',
  'life',
  'could',
  'not',
  'be',
  'sustained',
  'even',
  'in',
  'a',
  'vacuum',
  'for',
  'the',
  'expansion',
  'and',
  'compression',
  'of',
  'chest',
  'commonly',
  'called',
  'breathing',
  'is',
  'action',
  'purely',
  'muscular',
  'and',
  'the',
  'cause',
  'not',
  'the',
  'effect',
  'of',
  'respiration'],
 ['in',
  'a',
  'word',
  'i',
  'conceived',
  'that',
  'as',
  'the',
  'body',
  'should',
  'become',
  'habituated',
  'to',
  'the',
  'want',
  'of',
  'atmospheric',
  'pressure',
  'the',
  'sensations',
  'of',
  'pain',
  'would',
  'gradually',
  'diminish',
  'and',
  'to',
  'endure',
  'them',
  'while',
  'they',
  'continued',
  'i',
  'relied',
  'with',
  'confidence',
  'upon',
  'the',
  'iron',
  'hardihood',
  'of',
  'my',
  'constitution'],
 ['may',
  'it',
  'please',
  'your',
  'excellencies',
  'i',
  'have',
  'detailed',
  'some',
  'though',
  'by',
  'no',
  'means',
  'all',
  'the',
  'considerations',
  'which',
  'led',
  'me',
  'to',
  'form',
  'the',
  'project',
  'of',
  'a',
  'lunar',
  'voyage'],
 ['i',
  'shall',
  'now',
  'proceed',
  'to',
  'lay',
  'before',
  'you',
  'the',
  'result',
  'of',
  'an',
  'attempt',
  'so',
  'apparently',
  'audacious',
  'in',
  'conception',
  'and',
  'at',
  'all',
  'events',
  'so',
  'utterly',
  'unparalleled',
  'in',
  'the',
  'annals',
  'of',
  'mankind'],
 ['having',
  'attained',
  'the',
  'altitude',
  'before',
  'mentioned',
  'that',
  'is',
  'to',
  'say',
  'three',
  'miles',
  'and',
  'threequarters',
  'i',
  'threw',
  'out',
  'from',
  'the',
  'car',
  'a',
  'quantity',
  'of',
  'feathers',
  'and',
  'found',
  'that',
  'i',
  'still',
  'ascended',
  'with',
  'sufficient',
  'rapidity',
  'there',
  'was',
  'therefore',
  'no',
  'necessity',
  'for',
  'discharging',
  'any',
  'ballast'],
 ['i',
  'was',
  'glad',
  'of',
  'this',
  'for',
  'i',
  'wished',
  'to',
  'retain',
  'with',
  'me',
  'as',
  'much',
  'weight',
  'as',
  'i',
  'could',
  'carry',
  'for',
  'reasons',
  'which',
  'will',
  'be',
  'explained',
  'in',
  'the',
  'sequel'],
 ['i',
  'as',
  'yet',
  'suffered',
  'no',
  'bodily',
  'inconvenience',
  'breathing',
  'with',
  'great',
  'freedom',
  'and',
  'feeling',
  'no',
  'pain',
  'whatever',
  'in',
  'the',
  'head'],
 ['the',
  'cat',
  'was',
  'lying',
  'very',
  'demurely',
  'upon',
  'my',
  'coat',
  'which',
  'i',
  'had',
  'taken',
  'off',
  'and',
  'eyeing',
  'the',
  'pigeons',
  'with',
  'an',
  'air',
  'of',
  'nonchalance'],
 ['these',
  'latter',
  'being',
  'tied',
  'by',
  'the',
  'leg',
  'to',
  'prevent',
  'their',
  'escape',
  'were',
  'busily',
  'employed',
  'in',
  'picking',
  'up',
  'some',
  'grains',
  'of',
  'rice',
  'scattered',
  'for',
  'them',
  'in',
  'the',
  'bottom',
  'of',
  'the',
  'car'],
 ['at',
  'twenty',
  'minutes',
  'past',
  'six',
  'o',
  'clock',
  'the',
  'barometer',
  'showed',
  'an',
  'elevation',
  'of',
  '26400',
  'feet',
  'or',
  'five',
  'miles',
  'to',
  'a',
  'fraction'],
 ['the', 'prospect', 'seemed', 'unbounded'],
 ['indeed',
  'it',
  'is',
  'very',
  'easily',
  'calculated',
  'by',
  'means',
  'of',
  'spherical',
  'geometry',
  'what',
  'a',
  'great',
  'extent',
  'of',
  'the',
  'earth',
  's',
  'area',
  'i',
  'beheld'],
 ['the',
  'convex',
  'surface',
  'of',
  'any',
  'segment',
  'of',
  'a',
  'sphere',
  'is',
  'to',
  'the',
  'entire',
  'surface',
  'of',
  'the',
  'sphere',
  'itself',
  'as',
  'the',
  'versed',
  'sine',
  'of',
  'the',
  'segment',
  'to',
  'the',
  'diameter',
  'of',
  'the',
  'sphere'],
 ['now',
  'in',
  'my',
  'case',
  'the',
  'versed',
  'sine',
  'that',
  'is',
  'to',
  'say',
  'the',
  'thickness',
  'of',
  'the',
  'segment',
  'beneath',
  'me',
  'was',
  'about',
  'equal',
  'to',
  'my',
  'elevation',
  'or',
  'the',
  'elevation',
  'of',
  'the',
  'point',
  'of',
  'sight',
  'above',
  'the',
  'surface'],
 ['as',
  'five',
  'miles',
  'then',
  'to',
  'eight',
  'thousand',
  'would',
  'express',
  'the',
  'proportion',
  'of',
  'the',
  'earth',
  's',
  'area',
  'seen',
  'by',
  'me'],
 ['in',
  'other',
  'words',
  'i',
  'beheld',
  'as',
  'much',
  'as',
  'a',
  'sixteenhundredth',
  'part',
  'of',
  'the',
  'whole',
  'surface',
  'of',
  'the',
  'globe'],
 ['the',
  'sea',
  'appeared',
  'unruffled',
  'as',
  'a',
  'mirror',
  'although',
  'by',
  'means',
  'of',
  'the',
  'spyglass',
  'i',
  'could',
  'perceive',
  'it',
  'to',
  'be',
  'in',
  'a',
  'state',
  'of',
  'violent',
  'agitation'],
 ['the',
  'ship',
  'was',
  'no',
  'longer',
  'visible',
  'having',
  'drifted',
  'away',
  'apparently',
  'to',
  'the',
  'eastward'],
 ['i',
  'now',
  'began',
  'to',
  'experience',
  'at',
  'intervals',
  'severe',
  'pain',
  'in',
  'the',
  'head',
  'especially',
  'about',
  'the',
  'ears',
  'still',
  'however',
  'breathing',
  'with',
  'tolerable',
  'freedom'],
 ['the',
  'cat',
  'and',
  'pigeons',
  'seemed',
  'to',
  'suffer',
  'no',
  'inconvenience',
  'whatsoever'],
 ['at',
  'twenty',
  'minutes',
  'before',
  'seven',
  'the',
  'balloon',
  'entered',
  'a',
  'long',
  'series',
  'of',
  'dense',
  'cloud',
  'which',
  'put',
  'me',
  'to',
  'great',
  'trouble',
  'by',
  'damaging',
  'my',
  'condensing',
  'apparatus',
  'and',
  'wetting',
  'me',
  'to',
  'the',
  'skin'],
 ['this',
  'was',
  'to',
  'be',
  'sure',
  'a',
  'singular',
  'recontre',
  'for',
  'i',
  'had',
  'not',
  'believed',
  'it',
  'possible',
  'that',
  'a',
  'cloud',
  'of',
  'this',
  'nature',
  'could',
  'be',
  'sustained',
  'at',
  'so',
  'great',
  'an',
  'elevation'],
 ['i',
  'thought',
  'it',
  'best',
  'however',
  'to',
  'throw',
  'out',
  'two',
  'fivepound',
  'pieces',
  'of',
  'ballast',
  'reserving',
  'still',
  'a',
  'weight',
  'of',
  'one',
  'hundred',
  'and',
  'sixtyfive',
  'pounds'],
 ['upon',
  'so',
  'doing',
  'i',
  'soon',
  'rose',
  'above',
  'the',
  'difficulty',
  'and',
  'perceived',
  'immediately',
  'that',
  'i',
  'had',
  'obtained',
  'a',
  'great',
  'increase',
  'in',
  'my',
  'rate',
  'of',
  'ascent'],
 ['in',
  'a',
  'few',
  'seconds',
  'after',
  'my',
  'leaving',
  'the',
  'cloud',
  'a',
  'flash',
  'of',
  'vivid',
  'lightning',
  'shot',
  'from',
  'one',
  'end',
  'of',
  'it',
  'to',
  'the',
  'other',
  'and',
  'caused',
  'it',
  'to',
  'kindle',
  'up',
  'throughout',
  'its',
  'vast',
  'extent',
  'like',
  'a',
  'mass',
  'of',
  'ignited',
  'and',
  'glowing',
  'charcoal'],
 ['this',
  'it',
  'must',
  'be',
  'remembered',
  'was',
  'in',
  'the',
  'broad',
  'light',
  'of',
  'day'],
 ['no',
  'fancy',
  'may',
  'picture',
  'the',
  'sublimity',
  'which',
  'might',
  'have',
  'been',
  'exhibited',
  'by',
  'a',
  'similar',
  'phenomenon',
  'taking',
  'place',
  'amid',
  'the',
  'darkness',
  'of',
  'the',
  'night'],
 ['itself', 'might', 'have', 'been', 'found', 'a', 'fitting', 'image'],
 ['even',
  'as',
  'it',
  'was',
  'my',
  'hair',
  'stood',
  'on',
  'end',
  'while',
  'i',
  'gazed',
  'afar',
  'down',
  'within',
  'the',
  'yawning',
  'abysses',
  'letting',
  'imagination',
  'descend',
  'as',
  'it',
  'were',
  'and',
  'stalk',
  'about',
  'in',
  'the',
  'strange',
  'vaulted',
  'halls',
  'and',
  'ruddy',
  'gulfs',
  'and',
  'red',
  'ghastly',
  'chasms',
  'of',
  'the',
  'hideous',
  'and',
  'unfathomable',
  'fire'],
 ['i', 'had', 'indeed', 'made', 'a', 'narrow', 'escape'],
 ['had',
  'the',
  'balloon',
  'remained',
  'a',
  'very',
  'short',
  'while',
  'longer',
  'within',
  'the',
  'cloud',
  'that',
  'is',
  'to',
  'say',
  'had',
  'not',
  'the',
  'inconvenience',
  'of',
  'getting',
  'wet',
  'determined',
  'me',
  'to',
  'discharge',
  'the',
  'ballast',
  'inevitable',
  'ruin',
  'would',
  'have',
  'been',
  'the',
  'consequence'],
 ['such',
  'perils',
  'although',
  'little',
  'considered',
  'are',
  'perhaps',
  'the',
  'greatest',
  'which',
  'must',
  'be',
  'encountered',
  'in',
  'balloons'],
 ['i',
  'had',
  'by',
  'this',
  'time',
  'however',
  'attained',
  'too',
  'great',
  'an',
  'elevation',
  'to',
  'be',
  'any',
  'longer',
  'uneasy',
  'on',
  'this',
  'head'],
 ['i',
  'was',
  'now',
  'rising',
  'rapidly',
  'and',
  'by',
  'seven',
  'o',
  'clock',
  'the',
  'barometer',
  'indicated',
  'an',
  'altitude',
  'of',
  'no',
  'less',
  'than',
  'nine',
  'miles',
  'and',
  'a',
  'half'],
 ['i',
  'began',
  'to',
  'find',
  'great',
  'difficulty',
  'in',
  'drawing',
  'my',
  'breath'],
 ['my',
  'head',
  'too',
  'was',
  'excessively',
  'painful',
  'and',
  'having',
  'felt',
  'for',
  'some',
  'time',
  'a',
  'moisture',
  'about',
  'my',
  'cheeks',
  'i',
  'at',
  'length',
  'discovered',
  'it',
  'to',
  'be',
  'blood',
  'which',
  'was',
  'oozing',
  'quite',
  'fast',
  'from',
  'the',
  'drums',
  'of',
  'my',
  'ears'],
 ['my', 'eyes', 'also', 'gave', 'me', 'great', 'uneasiness'],
 ['upon',
  'passing',
  'the',
  'hand',
  'over',
  'them',
  'they',
  'seemed',
  'to',
  'have',
  'protruded',
  'from',
  'their',
  'sockets',
  'in',
  'no',
  'inconsiderable',
  'degree',
  'and',
  'all',
  'objects',
  'in',
  'the',
  'car',
  'and',
  'even',
  'the',
  'balloon',
  'itself',
  'appeared',
  'distorted',
  'to',
  'my',
  'vision'],
 ['these',
  'symptoms',
  'were',
  'more',
  'than',
  'i',
  'had',
  'expected',
  'and',
  'occasioned',
  'me',
  'some',
  'alarm'],
 ['at',
  'this',
  'juncture',
  'very',
  'imprudently',
  'and',
  'without',
  'consideration',
  'i',
  'threw',
  'out',
  'from',
  'the',
  'car',
  'three',
  'fivepound',
  'pieces',
  'of',
  'ballast'],
 ['the',
  'accelerated',
  'rate',
  'of',
  'ascent',
  'thus',
  'obtained',
  'carried',
  'me',
  'too',
  'rapidly',
  'and',
  'without',
  'sufficient',
  'gradation',
  'into',
  'a',
  'highly',
  'rarefied',
  'stratum',
  'of',
  'the',
  'atmosphere',
  'and',
  'the',
  'result',
  'had',
  'nearly',
  'proved',
  'fatal',
  'to',
  'my',
  'expedition',
  'and',
  'to',
  'myself'],
 ['i',
  'was',
  'suddenly',
  'seized',
  'with',
  'a',
  'spasm',
  'which',
  'lasted',
  'for',
  'more',
  'than',
  'five',
  'minutes',
  'and',
  'even',
  'when',
  'this',
  'in',
  'a',
  'measure',
  'ceased',
  'i',
  'could',
  'catch',
  'my',
  'breath',
  'only',
  'at',
  'long',
  'intervals',
  'and',
  'in',
  'a',
  'gasping',
  'manner',
  'bleeding',
  'all',
  'the',
  'while',
  'copiously',
  'at',
  'the',
  'nose',
  'and',
  'ears',
  'and',
  'even',
  'slightly',
  'at',
  'the',
  'eyes'],
 ['the',
  'pigeons',
  'appeared',
  'distressed',
  'in',
  'the',
  'extreme',
  'and',
  'struggled',
  'to',
  'escape',
  'while',
  'the',
  'cat',
  'mewed',
  'piteously',
  'and',
  'with',
  'her',
  'tongue',
  'hanging',
  'out',
  'of',
  'her',
  'mouth',
  'staggered',
  'to',
  'and',
  'fro',
  'in',
  'the',
  'car',
  'as',
  'if',
  'under',
  'the',
  'influence',
  'of',
  'poison'],
 ['i',
  'now',
  'too',
  'late',
  'discovered',
  'the',
  'great',
  'rashness',
  'of',
  'which',
  'i',
  'had',
  'been',
  'guilty',
  'in',
  'discharging',
  'the',
  'ballast',
  'and',
  'my',
  'agitation',
  'was',
  'excessive'],
 ['i',
  'anticipated',
  'nothing',
  'less',
  'than',
  'death',
  'and',
  'death',
  'in',
  'a',
  'few',
  'minutes'],
 ['the',
  'physical',
  'suffering',
  'i',
  'underwent',
  'contributed',
  'also',
  'to',
  'render',
  'me',
  'nearly',
  'incapable',
  'of',
  'making',
  'any',
  'exertion',
  'for',
  'the',
  'preservation',
  'of',
  'my',
  'life'],
 ['i',
  'had',
  'indeed',
  'little',
  'power',
  'of',
  'reflection',
  'left',
  'and',
  'the',
  'violence',
  'of',
  'the',
  'pain',
  'in',
  'my',
  'head',
  'seemed',
  'to',
  'be',
  'greatly',
  'on',
  'the',
  'increase'],
 ['thus',
  'i',
  'found',
  'that',
  'my',
  'senses',
  'would',
  'shortly',
  'give',
  'way',
  'altogether',
  'and',
  'i',
  'had',
  'already',
  'clutched',
  'one',
  'of',
  'the',
  'valve',
  'ropes',
  'with',
  'the',
  'view',
  'of',
  'attempting',
  'a',
  'descent',
  'when',
  'the',
  'recollection',
  'of',
  'the',
  'trick',
  'i',
  'had',
  'played',
  'the',
  'three',
  'creditors',
  'and',
  'the',
  'possible',
  'consequences',
  'to',
  'myself',
  'should',
  'i',
  'return',
  'operated',
  'to',
  'deter',
  'me',
  'for',
  'the',
  'moment'],
 ['i',
  'lay',
  'down',
  'in',
  'the',
  'bottom',
  'of',
  'the',
  'car',
  'and',
  'endeavored',
  'to',
  'collect',
  'my',
  'faculties'],
 ['in',
  'this',
  'i',
  'so',
  'far',
  'succeeded',
  'as',
  'to',
  'determine',
  'upon',
  'the',
  'experiment',
  'of',
  'losing',
  'blood'],
 ['having',
  'no',
  'lancet',
  'however',
  'i',
  'was',
  'constrained',
  'to',
  'perform',
  'the',
  'operation',
  'in',
  'the',
  'best',
  'manner',
  'i',
  'was',
  'able',
  'and',
  'finally',
  'succeeded',
  'in',
  'opening',
  'a',
  'vein',
  'in',
  'my',
  'right',
  'arm',
  'with',
  'the',
  'blade',
  'of',
  'my',
  'penknife'],
 ['the',
  'blood',
  'had',
  'hardly',
  'commenced',
  'flowing',
  'when',
  'i',
  'experienced',
  'a',
  'sensible',
  'relief',
  'and',
  'by',
  'the',
  'time',
  'i',
  'had',
  'lost',
  'about',
  'half',
  'a',
  'moderate',
  'basin',
  'full',
  'most',
  'of',
  'the',
  'worst',
  'symptoms',
  'had',
  'abandoned',
  'me',
  'entirely'],
 ['i',
  'nevertheless',
  'did',
  'not',
  'think',
  'it',
  'expedient',
  'to',
  'attempt',
  'getting',
  'on',
  'my',
  'feet',
  'immediately',
  'but',
  'having',
  'tied',
  'up',
  'my',
  'arm',
  'as',
  'well',
  'as',
  'i',
  'could',
  'i',
  'lay',
  'still',
  'for',
  'about',
  'a',
  'quarter',
  'of',
  'an',
  'hour'],
 ['at',
  'the',
  'end',
  'of',
  'this',
  'time',
  'i',
  'arose',
  'and',
  'found',
  'myself',
  'freer',
  'from',
  'absolute',
  'pain',
  'of',
  'any',
  'kind',
  'than',
  'i',
  'had',
  'been',
  'during',
  'the',
  'last',
  'hour',
  'and',
  'a',
  'quarter',
  'of',
  'my',
  'ascension'],
 ['the',
  'difficulty',
  'of',
  'breathing',
  'however',
  'was',
  'diminished',
  'in',
  'a',
  'very',
  'slight',
  'degree',
  'and',
  'i',
  'found',
  'that',
  'it',
  'would',
  'soon',
  'be',
  'positively',
  'necessary',
  'to',
  'make',
  'use',
  'of',
  'my',
  'condenser'],
 ['in',
  'the',
  'meantime',
  'looking',
  'toward',
  'the',
  'cat',
  'who',
  'was',
  'again',
  'snugly',
  'stowed',
  'away',
  'upon',
  'my',
  'coat',
  'i',
  'discovered',
  'to',
  'my',
  'infinite',
  'surprise',
  'that',
  'she',
  'had',
  'taken',
  'the',
  'opportunity',
  'of',
  'my',
  'indisposition',
  'to',
  'bring',
  'into',
  'light',
  'a',
  'litter',
  'of',
  'three',
  'little',
  'kittens'],
 ['this',
  'was',
  'an',
  'addition',
  'to',
  'the',
  'number',
  'of',
  'passengers',
  'on',
  'my',
  'part',
  'altogether',
  'unexpected',
  'but',
  'i',
  'was',
  'pleased',
  'at',
  'the',
  'occurrence'],
 ['it',
  'would',
  'afford',
  'me',
  'a',
  'chance',
  'of',
  'bringing',
  'to',
  'a',
  'kind',
  'of',
  'test',
  'the',
  'truth',
  'of',
  'a',
  'surmise',
  'which',
  'more',
  'than',
  'anything',
  'else',
  'had',
  'influenced',
  'me',
  'in',
  'attempting',
  'this',
  'ascension'],
 ['i',
  'had',
  'imagined',
  'that',
  'the',
  'habitual',
  'endurance',
  'of',
  'the',
  'atmospheric',
  'pressure',
  'at',
  'the',
  'surface',
  'of',
  'the',
  'earth',
  'was',
  'the',
  'cause',
  'or',
  'nearly',
  'so',
  'of',
  'the',
  'pain',
  'attending',
  'animal',
  'existence',
  'at',
  'a',
  'distance',
  'above',
  'the',
  'surface'],
 ['should',
  'the',
  'kittens',
  'be',
  'found',
  'to',
  'suffer',
  'uneasiness',
  'in',
  'an',
  'equal',
  'degree',
  'with',
  'their',
  'mother',
  'i',
  'must',
  'consider',
  'my',
  'theory',
  'in',
  'fault',
  'but',
  'a',
  'failure',
  'to',
  'do',
  'so',
  'i',
  'should',
  'look',
  'upon',
  'as',
  'a',
  'strong',
  'confirmation',
  'of',
  'my',
  'idea'],
 ['by',
  'eight',
  'o',
  'clock',
  'i',
  'had',
  'actually',
  'attained',
  'an',
  'elevation',
  'of',
  'seventeen',
  'miles',
  'above',
  'the',
  'surface',
  'of',
  'the',
  'earth'],
 ['thus',
  'it',
  'seemed',
  'to',
  'me',
  'evident',
  'that',
  'my',
  'rate',
  'of',
  'ascent',
  'was',
  'not',
  'only',
  'on',
  'the',
  'increase',
  'but',
  'that',
  'the',
  'progression',
  'would',
  'have',
  'been',
  'apparent',
  'in',
  'a',
  'slight',
  'degree',
  'even',
  'had',
  'i',
  'not',
  'discharged',
  'the',
  'ballast',
  'which',
  'i',
  'did'],
 ['the',
  'pains',
  'in',
  'my',
  'head',
  'and',
  'ears',
  'returned',
  'at',
  'intervals',
  'with',
  'violence',
  'and',
  'i',
  'still',
  'continued',
  'to',
  'bleed',
  'occasionally',
  'at',
  'the',
  'nose',
  'but',
  'upon',
  'the',
  'whole',
  'i',
  'suffered',
  'much',
  'less',
  'than',
  'might',
  'have',
  'been',
  'expected'],
 ['i',
  'breathed',
  'however',
  'at',
  'every',
  'moment',
  'with',
  'more',
  'and',
  'more',
  'difficulty',
  'and',
  'each',
  'inhalation',
  'was',
  'attended',
  'with',
  'a',
  'troublesome',
  'spasmodic',
  'action',
  'of',
  'the',
  'chest'],
 ['i',
  'now',
  'unpacked',
  'the',
  'condensing',
  'apparatus',
  'and',
  'got',
  'it',
  'ready',
  'for',
  'immediate',
  'use'],
 ['the',
  'view',
  'of',
  'the',
  'earth',
  'at',
  'this',
  'period',
  'of',
  'my',
  'ascension',
  'was',
  'beautiful',
  'indeed'],
 ['to',
  'the',
  'westward',
  'the',
  'northward',
  'and',
  'the',
  'southward',
  'as',
  'far',
  'as',
  'i',
  'could',
  'see',
  'lay',
  'a',
  'boundless',
  'sheet',
  'of',
  'apparently',
  'unruffled',
  'ocean',
  'which',
  'every',
  'moment',
  'gained',
  'a',
  'deeper',
  'and',
  'a',
  'deeper',
  'tint',
  'of',
  'blue',
  'and',
  'began',
  'already',
  'to',
  'assume',
  'a',
  'slight',
  'appearance',
  'of',
  'convexity'],
 ['at',
  'a',
  'vast',
  'distance',
  'to',
  'the',
  'eastward',
  'although',
  'perfectly',
  'discernible',
  'extended',
  'the',
  'islands',
  'of',
  'the',
  'entire',
  'coasts',
  'of',
  'and',
  'with',
  'a',
  'small',
  'portion',
  'of',
  'the',
  'northern',
  'part',
  'of',
  'the',
  'continent',
  'of'],
 ['of',
  'individual',
  'edifices',
  'not',
  'a',
  'trace',
  'could',
  'be',
  'discovered',
  'and',
  'the',
  'proudest',
  'cities',
  'of',
  'mankind',
  'had',
  'utterly',
  'faded',
  'away',
  'from',
  'the',
  'face',
  'of',
  'the',
  'earth'],
 ['from',
  'the',
  'rock',
  'of',
  'now',
  'dwindled',
  'into',
  'a',
  'dim',
  'speck',
  'the',
  'dark',
  'sea',
  'dotted',
  'with',
  'shining',
  'islands',
  'as',
  'the',
  'heaven',
  'is',
  'dotted',
  'with',
  'stars',
  'spread',
  'itself',
  'out',
  'to',
  'the',
  'eastward',
  'as',
  'far',
  'as',
  'my',
  'vision',
  'extended',
  'until',
  'its',
  'entire',
  'mass',
  'of',
  'waters',
  'seemed',
  'at',
  'length',
  'to',
  'tumble',
  'headlong',
  'over',
  'the',
  'abyss',
  'of',
  'the',
  'horizon',
  'and',
  'i',
  'found',
  'myself',
  'listening',
  'on',
  'tiptoe',
  'for',
  'the',
  'echoes',
  'of',
  'the',
  'mighty',
  'cataract'],
 ['overhead',
  'the',
  'sky',
  'was',
  'of',
  'a',
  'jetty',
  'black',
  'and',
  'the',
  'stars',
  'were',
  'brilliantly',
  'visible'],
 ['the',
  'pigeons',
  'about',
  'this',
  'time',
  'seeming',
  'to',
  'undergo',
  'much',
  'suffering',
  'i',
  'determined',
  'upon',
  'giving',
  'them',
  'their',
  'liberty'],
 ['i',
  'first',
  'untied',
  'one',
  'of',
  'them',
  'a',
  'beautiful',
  'graymottled',
  'pigeon',
  'and',
  'placed',
  'him',
  'upon',
  'the',
  'rim',
  'of',
  'the',
  'wickerwork'],
 ['he',
  'appeared',
  'extremely',
  'uneasy',
  'looking',
  'anxiously',
  'around',
  'him',
  'fluttering',
  'his',
  'wings',
  'and',
  'making',
  'a',
  'loud',
  'cooing',
  'noise',
  'but',
  'could',
  'not',
  'be',
  'persuaded',
  'to',
  'trust',
  'himself',
  'from',
  'off',
  'the',
  'car'],
 ['i',
  'took',
  'him',
  'up',
  'at',
  'last',
  'and',
  'threw',
  'him',
  'to',
  'about',
  'half',
  'a',
  'dozen',
  'yards',
  'from',
  'the',
  'balloon'],
 ['he',
  'made',
  'however',
  'no',
  'attempt',
  'to',
  'descend',
  'as',
  'i',
  'had',
  'expected',
  'but',
  'struggled',
  'with',
  'great',
  'vehemence',
  'to',
  'get',
  'back',
  'uttering',
  'at',
  'the',
  'same',
  'time',
  'very',
  'shrill',
  'and',
  'piercing',
  'cries'],
 ['he',
  'at',
  'length',
  'succeeded',
  'in',
  'regaining',
  'his',
  'former',
  'station',
  'on',
  'the',
  'rim',
  'but',
  'had',
  'hardly',
  'done',
  'so',
  'when',
  'his',
  'head',
  'dropped',
  'upon',
  'his',
  'breast',
  'and',
  'he',
  'fell',
  'dead',
  'within',
  'the',
  'car'],
 ['the', 'other', 'one', 'did', 'not', 'prove', 'so', 'unfortunate'],
 ['to',
  'prevent',
  'his',
  'following',
  'the',
  'example',
  'of',
  'his',
  'companion',
  'and',
  'accomplishing',
  'a',
  'return',
  'i',
  'threw',
  'him',
  'downward',
  'with',
  'all',
  'my',
  'force',
  'and',
  'was',
  'pleased',
  'to',
  'find',
  'him',
  'continue',
  'his',
  'descent',
  'with',
  'great',
  'velocity',
  'making',
  'use',
  'of',
  'his',
  'wings',
  'with',
  'ease',
  'and',
  'in',
  'a',
  'perfectly',
  'natural',
  'manner'],
 ['in',
  'a',
  'very',
  'short',
  'time',
  'he',
  'was',
  'out',
  'of',
  'sight',
  'and',
  'i',
  'have',
  'no',
  'doubt',
  'he',
  'reached',
  'home',
  'in',
  'safety'],
 ['who',
  'seemed',
  'in',
  'a',
  'great',
  'measure',
  'recovered',
  'from',
  'her',
  'illness',
  'now',
  'made',
  'a',
  'hearty',
  'meal',
  'of',
  'the',
  'dead',
  'bird',
  'and',
  'then',
  'went',
  'to',
  'sleep',
  'with',
  'much',
  'apparent',
  'satisfaction'],
 ['her',
  'kittens',
  'were',
  'quite',
  'lively',
  'and',
  'so',
  'far',
  'evinced',
  'not',
  'the',
  'slightest',
  'sign',
  'of',
  'any',
  'uneasiness',
  'whatever'],
 ['at',
  'a',
  'quarterpast',
  'eight',
  'being',
  'no',
  'longer',
  'able',
  'to',
  'draw',
  'breath',
  'without',
  'the',
  'most',
  'intolerable',
  'pain',
  'i',
  'proceeded',
  'forthwith',
  'to',
  'adjust',
  'around',
  'the',
  'car',
  'the',
  'apparatus',
  'belonging',
  'to',
  'the',
  'condenser'],
 ['this',
  'apparatus',
  'will',
  'require',
  'some',
  'little',
  'explanation',
  'and',
  'your',
  'excellencies',
  'will',
  'please',
  'to',
  'bear',
  'in',
  'mind',
  'that',
  'my',
  'object',
  'in',
  'the',
  'first',
  'place',
  'was',
  'to',
  'surround',
  'myself',
  'and',
  'cat',
  'entirely',
  'with',
  'a',
  'barricade',
  'against',
  'the',
  'highly',
  'rarefied',
  'atmosphere',
  'in',
  'which',
  'i',
  'was',
  'existing',
  'with',
  'the',
  'intention',
  'of',
  'introducing',
  'within',
  'this',
  'barricade',
  'by',
  'means',
  'of',
  'my',
  'condenser',
  'a',
  'quantity',
  'of',
  'this',
  'same',
  'atmosphere',
  'sufficiently',
  'condensed',
  'for',
  'the',
  'purposes',
  'of',
  'respiration'],
 ['with',
  'this',
  'object',
  'in',
  'view',
  'i',
  'had',
  'prepared',
  'a',
  'very',
  'strong',
  'perfectly',
  'airtight',
  'but',
  'flexible',
  'gumelastic',
  'bag'],
 ['in',
  'this',
  'bag',
  'which',
  'was',
  'of',
  'sufficient',
  'dimensions',
  'the',
  'entire',
  'car',
  'was',
  'in',
  'a',
  'manner',
  'placed'],
 ['that',
  'is',
  'to',
  'say',
  'it',
  'the',
  'bag',
  'was',
  'drawn',
  'over',
  'the',
  'whole',
  'bottom',
  'of',
  'the',
  'car',
  'up',
  'its',
  'sides',
  'and',
  'so',
  'on',
  'along',
  'the',
  'outside',
  'of',
  'the',
  'ropes',
  'to',
  'the',
  'upper',
  'rim',
  'or',
  'hoop',
  'where',
  'the',
  'network',
  'is',
  'attached'],
 ['having',
  'pulled',
  'the',
  'bag',
  'up',
  'in',
  'this',
  'way',
  'and',
  'formed',
  'a',
  'complete',
  'enclosure',
  'on',
  'all',
  'sides',
  'and',
  'at',
  'bottom',
  'it',
  'was',
  'now',
  'necessary',
  'to',
  'fasten',
  'up',
  'its',
  'top',
  'or',
  'mouth',
  'by',
  'passing',
  'its',
  'material',
  'over',
  'the',
  'hoop',
  'of',
  'the',
  'network',
  'in',
  'other',
  'words',
  'between',
  'the',
  'network',
  'and',
  'the',
  'hoop'],
 ['but',
  'if',
  'the',
  'network',
  'were',
  'separated',
  'from',
  'the',
  'hoop',
  'to',
  'admit',
  'this',
  'passage',
  'what',
  'was',
  'to',
  'sustain',
  'the',
  'car',
  'in',
  'the',
  'meantime'],
 ['now',
  'the',
  'network',
  'was',
  'not',
  'permanently',
  'fastened',
  'to',
  'the',
  'hoop',
  'but',
  'attached',
  'by',
  'a',
  'series',
  'of',
  'running',
  'loops',
  'or',
  'nooses'],
 ['i',
  'therefore',
  'undid',
  'only',
  'a',
  'few',
  'of',
  'these',
  'loops',
  'at',
  'one',
  'time',
  'leaving',
  'the',
  'car',
  'suspended',
  'by',
  'the',
  'remainder'],
 ['having',
  'thus',
  'inserted',
  'a',
  'portion',
  'of',
  'the',
  'cloth',
  'forming',
  'the',
  'upper',
  'part',
  'of',
  'the',
  'bag',
  'i',
  'refastened',
  'the',
  'loops',
  'not',
  'to',
  'the',
  'hoop',
  'for',
  'that',
  'would',
  'have',
  'been',
  'impossible',
  'since',
  'the',
  'cloth',
  'now',
  'intervened',
  'but',
  'to',
  'a',
  'series',
  'of',
  'large',
  'buttons',
  'affixed',
  'to',
  'the',
  'cloth',
  'itself',
  'about',
  'three',
  'feet',
  'below',
  'the',
  'mouth',
  'of',
  'the',
  'bag',
  'the',
  'intervals',
  'between',
  'the',
  'buttons',
  'having',
  'been',
  'made',
  'to',
  'correspond',
  'to',
  'the',
  'intervals',
  'between',
  'the',
  'loops'],
 ['this',
  'done',
  'a',
  'few',
  'more',
  'of',
  'the',
  'loops',
  'were',
  'unfastened',
  'from',
  'the',
  'rim',
  'a',
  'farther',
  'portion',
  'of',
  'the',
  'cloth',
  'introduced',
  'and',
  'the',
  'disengaged',
  'loops',
  'then',
  'connected',
  'with',
  'their',
  'proper',
  'buttons'],
 ['in',
  'this',
  'way',
  'it',
  'was',
  'possible',
  'to',
  'insert',
  'the',
  'whole',
  'upper',
  'part',
  'of',
  'the',
  'bag',
  'between',
  'the',
  'network',
  'and',
  'the',
  'hoop'],
 ['it',
  'is',
  'evident',
  'that',
  'the',
  'hoop',
  'would',
  'now',
  'drop',
  'down',
  'within',
  'the',
  'car',
  'while',
  'the',
  'whole',
  'weight',
  'of',
  'the',
  'car',
  'itself',
  'with',
  'all',
  'its',
  'contents',
  'would',
  'be',
  'held',
  'up',
  'merely',
  'by',
  'the',
  'strength',
  'of',
  'the',
  'buttons'],
 ['this',
  'at',
  'first',
  'sight',
  'would',
  'seem',
  'an',
  'inadequate',
  'dependence',
  'but',
  'it',
  'was',
  'by',
  'no',
  'means',
  'so',
  'for',
  'the',
  'buttons',
  'were',
  'not',
  'only',
  'very',
  'strong',
  'in',
  'themselves',
  'but',
  'so',
  'close',
  'together',
  'that',
  'a',
  'very',
  'slight',
  'portion',
  'of',
  'the',
  'whole',
  'weight',
  'was',
  'supported',
  'by',
  'any',
  'one',
  'of',
  'them'],
 ['indeed',
  'had',
  'the',
  'car',
  'and',
  'contents',
  'been',
  'three',
  'times',
  'heavier',
  'than',
  'they',
  'were',
  'i',
  'should',
  'not',
  'have',
  'been',
  'at',
  'all',
  'uneasy'],
 ['i',
  'now',
  'raised',
  'up',
  'the',
  'hoop',
  'again',
  'within',
  'the',
  'covering',
  'of',
  'gumelastic',
  'and',
  'propped',
  'it',
  'at',
  'nearly',
  'its',
  'former',
  'height',
  'by',
  'means',
  'of',
  'three',
  'light',
  'poles',
  'prepared',
  'for',
  'the',
  'occasion'],
 ['this',
  'was',
  'done',
  'of',
  'course',
  'to',
  'keep',
  'the',
  'bag',
  'distended',
  'at',
  'the',
  'top',
  'and',
  'to',
  'preserve',
  'the',
  'lower',
  'part',
  'of',
  'the',
  'network',
  'in',
  'its',
  'proper',
  'situation'],
 ['all',
  'that',
  'now',
  'remained',
  'was',
  'to',
  'fasten',
  'up',
  'the',
  'mouth',
  'of',
  'the',
  'enclosure',
  'and',
  'this',
  'was',
  'readily',
  'accomplished',
  'by',
  'gathering',
  'the',
  'folds',
  'of',
  'the',
  'material',
  'together',
  'and',
  'twisting',
  'them',
  'up',
  'very',
  'tightly',
  'on',
  'the',
  'inside',
  'by',
  'means',
  'of',
  'a',
  'kind',
  'of',
  'stationary',
  'tourniquet'],
 ['in',
  'the',
  'sides',
  'of',
  'the',
  'covering',
  'thus',
  'adjusted',
  'round',
  'the',
  'car',
  'had',
  'been',
  'inserted',
  'three',
  'circular',
  'panes',
  'of',
  'thick',
  'but',
  'clear',
  'glass',
  'through',
  'which',
  'i',
  'could',
  'see',
  'without',
  'difficulty',
  'around',
  'me',
  'in',
  'every',
  'horizontal',
  'direction'],
 ['in',
  'that',
  'portion',
  'of',
  'the',
  'cloth',
  'forming',
  'the',
  'bottom',
  'was',
  'likewise',
  'a',
  'fourth',
  'window',
  'of',
  'the',
  'same',
  'kind',
  'and',
  'corresponding',
  'with',
  'a',
  'small',
  'aperture',
  'in',
  'the',
  'floor',
  'of',
  'the',
  'car',
  'itself'],
 ['this',
  'enabled',
  'me',
  'to',
  'see',
  'perpendicularly',
  'down',
  'but',
  'having',
  'found',
  'it',
  'impossible',
  'to',
  'place',
  'any',
  'similar',
  'contrivance',
  'overhead',
  'on',
  'account',
  'of',
  'the',
  'peculiar',
  'manner',
  'of',
  'closing',
  'up',
  'the',
  'opening',
  'there',
  'and',
  'the',
  'consequent',
  'wrinkles',
  'in',
  'the',
  'cloth',
  'i',
  'could',
  'expect',
  'to',
  'see',
  'no',
  'objects',
  'situated',
  'directly',
  'in',
  'my',
  'zenith'],
 ['this',
  'of',
  'course',
  'was',
  'a',
  'matter',
  'of',
  'little',
  'consequence',
  'for',
  'had',
  'i',
  'even',
  'been',
  'able',
  'to',
  'place',
  'a',
  'window',
  'at',
  'top',
  'the',
  'balloon',
  'itself',
  'would',
  'have',
  'prevented',
  'my',
  'making',
  'any',
  'use',
  'of',
  'it'],
 ['about',
  'a',
  'foot',
  'below',
  'one',
  'of',
  'the',
  'side',
  'windows',
  'was',
  'a',
  'circular',
  'opening',
  'eight',
  'inches',
  'in',
  'diameter',
  'and',
  'fitted',
  'with',
  'a',
  'brass',
  'rim',
  'adapted',
  'in',
  'its',
  'inner',
  'edge',
  'to',
  'the',
  'windings',
  'of',
  'a',
  'screw'],
 ['in',
  'this',
  'rim',
  'was',
  'screwed',
  'the',
  'large',
  'tube',
  'of',
  'the',
  'condenser',
  'the',
  'body',
  'of',
  'the',
  'machine',
  'being',
  'of',
  'course',
  'within',
  'the',
  'chamber',
  'of',
  'gumelastic'],
 ['through',
  'this',
  'tube',
  'a',
  'quantity',
  'of',
  'the',
  'rare',
  'atmosphere',
  'circumjacent',
  'being',
  'drawn',
  'by',
  'means',
  'of',
  'a',
  'vacuum',
  'created',
  'in',
  'the',
  'body',
  'of',
  'the',
  'machine',
  'was',
  'thence',
  'discharged',
  'in',
  'a',
  'state',
  'of',
  'condensation',
  'to',
  'mingle',
  'with',
  'the',
  'thin',
  'air',
  'already',
  'in',
  'the',
  'chamber'],
 ['this',
  'operation',
  'being',
  'repeated',
  'several',
  'times',
  'at',
  'length',
  'filled',
  'the',
  'chamber',
  'with',
  'atmosphere',
  'proper',
  'for',
  'all',
  'the',
  'purposes',
  'of',
  'respiration'],
 ['but',
  'in',
  'so',
  'confined',
  'a',
  'space',
  'it',
  'would',
  'in',
  'a',
  'short',
  'time',
  'necessarily',
  'become',
  'foul',
  'and',
  'unfit',
  'for',
  'use',
  'from',
  'frequent',
  'contact',
  'with',
  'the',
  'lungs'],
 ['it',
  'was',
  'then',
  'ejected',
  'by',
  'a',
  'small',
  'valve',
  'at',
  'the',
  'bottom',
  'of',
  'the',
  'car',
  'the',
  'dense',
  'air',
  'readily',
  'sinking',
  'into',
  'the',
  'thinner',
  'atmosphere',
  'below'],
 ['to',
  'avoid',
  'the',
  'inconvenience',
  'of',
  'making',
  'a',
  'total',
  'vacuum',
  'at',
  'any',
  'moment',
  'within',
  'the',
  'chamber',
  'this',
  'purification',
  'was',
  'never',
  'accomplished',
  'all',
  'at',
  'once',
  'but',
  'in',
  'a',
  'gradual',
  'manner',
  'the',
  'valve',
  'being',
  'opened',
  'only',
  'for',
  'a',
  'few',
  'seconds',
  'then',
  'closed',
  'again',
  'until',
  'one',
  'or',
  'two',
  'strokes',
  'from',
  'the',
  'pump',
  'of',
  'the',
  'condenser',
  'had',
  'supplied',
  'the',
  'place',
  'of',
  'the',
  'atmosphere',
  'ejected'],
 ['for',
  'the',
  'sake',
  'of',
  'experiment',
  'i',
  'had',
  'put',
  'the',
  'cat',
  'and',
  'kittens',
  'in',
  'a',
  'small',
  'basket',
  'and',
  'suspended',
  'it',
  'outside',
  'the',
  'car',
  'to',
  'a',
  'button',
  'at',
  'the',
  'bottom',
  'close',
  'by',
  'the',
  'valve',
  'through',
  'which',
  'i',
  'could',
  'feed',
  'them',
  'at',
  'any',
  'moment',
  'when',
  'necessary'],
 ['i',
  'did',
  'this',
  'at',
  'some',
  'little',
  'risk',
  'and',
  'before',
  'closing',
  'the',
  'mouth',
  'of',
  'the',
  'chamber',
  'by',
  'reaching',
  'under',
  'the',
  'car',
  'with',
  'one',
  'of',
  'the',
  'poles',
  'before',
  'mentioned',
  'to',
  'which',
  'a',
  'hook',
  'had',
  'been',
  'attached'],
 ['by',
  'the',
  'time',
  'i',
  'had',
  'fully',
  'completed',
  'these',
  'arrangements',
  'and',
  'filled',
  'the',
  'chamber',
  'as',
  'explained',
  'it',
  'wanted',
  'only',
  'ten',
  'minutes',
  'of',
  'nine',
  'o',
  'clock'],
 ['during',
  'the',
  'whole',
  'period',
  'of',
  'my',
  'being',
  'thus',
  'employed',
  'i',
  'endured',
  'the',
  'most',
  'terrible',
  'distress',
  'from',
  'difficulty',
  'of',
  'respiration',
  'and',
  'bitterly',
  'did',
  'i',
  'repent',
  'the',
  'negligence',
  'or',
  'rather',
  'foolhardiness',
  'of',
  'which',
  'i',
  'had',
  'been',
  'guilty',
  'of',
  'putting',
  'off',
  'to',
  'the',
  'last',
  'moment',
  'a',
  'matter',
  'of',
  'so',
  'much',
  'importance'],
 ['but',
  'having',
  'at',
  'length',
  'accomplished',
  'it',
  'i',
  'soon',
  'began',
  'to',
  'reap',
  'the',
  'benefit',
  'of',
  'my',
  'invention'],
 ['once',
  'again',
  'i',
  'breathed',
  'with',
  'perfect',
  'freedom',
  'and',
  'ease',
  'and',
  'indeed',
  'why',
  'should',
  'i',
  'not'],
 ['i',
  'was',
  'also',
  'agreeably',
  'surprised',
  'to',
  'find',
  'myself',
  'in',
  'a',
  'great',
  'measure',
  'relieved',
  'from',
  'the',
  'violent',
  'pains',
  'which',
  'had',
  'hitherto',
  'tormented',
  'me'],
 ['a',
  'slight',
  'headache',
  'accompanied',
  'with',
  'a',
  'sensation',
  'of',
  'fulness',
  'or',
  'distention',
  'about',
  'the',
  'wrists',
  'the',
  'ankles',
  'and',
  'the',
  'throat',
  'was',
  'nearly',
  'all',
  'of',
  'which',
  'i',
  'had',
  'now',
  'to',
  'complain'],
 ['thus',
  'it',
  'seemed',
  'evident',
  'that',
  'a',
  'greater',
  'part',
  'of',
  'the',
  'uneasiness',
  'attending',
  'the',
  'removal',
  'of',
  'atmospheric',
  'pressure',
  'had',
  'actually',
  'worn',
  'off',
  'as',
  'i',
  'had',
  'expected',
  'and',
  'that',
  'much',
  'of',
  'the',
  'pain',
  'endured',
  'for',
  'the',
  'last',
  'two',
  'hours',
  'should',
  'have',
  'been',
  'attributed',
  'altogether',
  'to',
  'the',
  'effects',
  'of',
  'a',
  'deficient',
  'respiration'],
 ['at',
  'twenty',
  'minutes',
  'before',
  'nine',
  'o',
  'clock',
  'that',
  'is',
  'to',
  'say',
  'a',
  'short',
  'time',
  'prior',
  'to',
  'my',
  'closing',
  'up',
  'the',
  'mouth',
  'of',
  'the',
  'chamber',
  'the',
  'mercury',
  'attained',
  'its',
  'limit',
  'or',
  'ran',
  'down',
  'in',
  'the',
  'barometer',
  'which',
  'as',
  'i',
  'mentioned',
  'before',
  'was',
  'one',
  'of',
  'an',
  'extended',
  'construction'],
 ['it',
  'then',
  'indicated',
  'an',
  'altitude',
  'on',
  'my',
  'part',
  'of',
  '132000',
  'feet',
  'or',
  'fiveandtwenty',
  'miles',
  'and',
  'i',
  'consequently',
  'surveyed',
  'at',
  'that',
  'time',
  'an',
  'extent',
  'of',
  'the',
  'earth',
  's',
  'area',
  'amounting',
  'to',
  'no',
  'less',
  'than',
  'the',
  'three',
  'hundredandtwentieth',
  'part',
  'of',
  'its',
  'entire',
  'superficies'],
 ['at',
  'nine',
  'o',
  'clock',
  'i',
  'had',
  'again',
  'lost',
  'sight',
  'of',
  'land',
  'to',
  'the',
  'eastward',
  'but',
  'not',
  'before',
  'i',
  'became',
  'aware',
  'that',
  'the',
  'balloon',
  'was',
  'drifting',
  'rapidly',
  'to',
  'the',
  'the',
  'convexity',
  'of',
  'the',
  'ocean',
  'beneath',
  'me',
  'was',
  'very',
  'evident',
  'indeed',
  'although',
  'my',
  'view',
  'was',
  'often',
  'interrupted',
  'by',
  'the',
  'masses',
  'of',
  'cloud',
  'which',
  'floated',
  'to',
  'and',
  'fro'],
 ['i',
  'observed',
  'now',
  'that',
  'even',
  'the',
  'lightest',
  'vapors',
  'never',
  'rose',
  'to',
  'more',
  'than',
  'ten',
  'miles',
  'above',
  'the',
  'level',
  'of',
  'the',
  'sea'],
 ['at',
  'half',
  'past',
  'nine',
  'i',
  'tried',
  'the',
  'experiment',
  'of',
  'throwing',
  'out',
  'a',
  'handful',
  'of',
  'feathers',
  'through',
  'the',
  'valve'],
 ['they',
  'did',
  'not',
  'float',
  'as',
  'i',
  'had',
  'expected',
  'but',
  'dropped',
  'down',
  'perpendicularly',
  'like',
  'a',
  'bullet',
  'en',
  'masse',
  'and',
  'with',
  'the',
  'greatest',
  'velocity',
  'being',
  'out',
  'of',
  'sight',
  'in',
  'a',
  'very',
  'few',
  'seconds'],
 ['i',
  'did',
  'not',
  'at',
  'first',
  'know',
  'what',
  'to',
  'make',
  'of',
  'this',
  'extraordinary',
  'phenomenon',
  'not',
  'being',
  'able',
  'to',
  'believe',
  'that',
  'my',
  'rate',
  'of',
  'ascent',
  'had',
  'of',
  'a',
  'sudden',
  'met',
  'with',
  'so',
  'prodigious',
  'an',
  'acceleration'],
 ['but',
  'it',
  'soon',
  'occurred',
  'to',
  'me',
  'that',
  'the',
  'atmosphere',
  'was',
  'now',
  'far',
  'too',
  'rare',
  'to',
  'sustain',
  'even',
  'the',
  'feathers',
  'that',
  'they',
  'actually',
  'fell',
  'as',
  'they',
  'appeared',
  'to',
  'do',
  'with',
  'great',
  'rapidity',
  'and',
  'that',
  'i',
  'had',
  'been',
  'surprised',
  'by',
  'the',
  'united',
  'velocities',
  'of',
  'their',
  'descent',
  'and',
  'my',
  'own',
  'elevation'],
 ['by',
  'ten',
  'o',
  'clock',
  'i',
  'found',
  'that',
  'i',
  'had',
  'very',
  'little',
  'to',
  'occupy',
  'my',
  'immediate',
  'attention'],
 ['went',
  'swimmingly',
  'and',
  'i',
  'believed',
  'the',
  'balloon',
  'to',
  'be',
  'going',
  'upward',
  'with',
  'a',
  'speed',
  'increasing',
  'momently',
  'although',
  'i',
  'had',
  'no',
  'longer',
  'any',
  'means',
  'of',
  'ascertaining',
  'the',
  'progression',
  'of',
  'the',
  'increase'],
 ['i',
  'suffered',
  'no',
  'pain',
  'or',
  'uneasiness',
  'of',
  'any',
  'kind',
  'and',
  'enjoyed',
  'better',
  'spirits',
  'than',
  'i',
  'had',
  'at',
  'any',
  'period',
  'since',
  'my',
  'departure',
  'from',
  'busying',
  'myself',
  'now',
  'in',
  'examining',
  'the',
  'state',
  'of',
  'my',
  'various',
  'apparatus',
  'and',
  'now',
  'in',
  'regenerating',
  'the',
  'atmosphere',
  'within',
  'the',
  'chamber'],
 ['this',
  'latter',
  'point',
  'i',
  'determined',
  'to',
  'attend',
  'to',
  'at',
  'regular',
  'intervals',
  'of',
  'forty',
  'minutes',
  'more',
  'on',
  'account',
  'of',
  'the',
  'preservation',
  'of',
  'my',
  'health',
  'than',
  'from',
  'so',
  'frequent',
  'a',
  'renovation',
  'being',
  'absolutely',
  'necessary'],
 ['in',
  'the',
  'meanwhile',
  'i',
  'could',
  'not',
  'help',
  'making',
  'anticipations'],
 ['revelled',
  'in',
  'the',
  'wild',
  'and',
  'dreamy',
  'regions',
  'of',
  'the',
  'moon'],
 ['imagination',
  'feeling',
  'herself',
  'for',
  'once',
  'unshackled',
  'roamed',
  'at',
  'will',
  'among',
  'the',
  'everchanging',
  'wonders',
  'of',
  'a',
  'shadowy',
  'and',
  'unstable',
  'land'],
 ['now',
  'there',
  'were',
  'hoary',
  'and',
  'timehonored',
  'forests',
  'and',
  'craggy',
  'precipices',
  'and',
  'waterfalls',
  'tumbling',
  'with',
  'a',
  'loud',
  'noise',
  'into',
  'abysses',
  'without',
  'a',
  'bottom'],
 ['then',
  'i',
  'came',
  'suddenly',
  'into',
  'still',
  'noonday',
  'solitudes',
  'where',
  'no',
  'wind',
  'of',
  'heaven',
  'ever',
  'intruded',
  'and',
  'where',
  'vast',
  'meadows',
  'of',
  'poppies',
  'and',
  'slender',
  'lilylooking',
  'flowers',
  'spread',
  'themselves',
  'out',
  'a',
  'weary',
  'distance',
  'all',
  'silent',
  'and',
  'motionless',
  'forever'],
 ['then',
  'again',
  'i',
  'journeyed',
  'far',
  'down',
  'away',
  'into',
  'another',
  'country',
  'where',
  'it',
  'was',
  'all',
  'one',
  'dim',
  'and',
  'vague',
  'lake',
  'with',
  'a',
  'boundary',
  'line',
  'of',
  'clouds'],
 ['and',
  'out',
  'of',
  'this',
  'melancholy',
  'water',
  'arose',
  'a',
  'forest',
  'of',
  'tall',
  'eastern',
  'trees',
  'like',
  'a',
  'wilderness',
  'of',
  'dreams'],
 ['and',
  'i',
  'have',
  'in',
  'mind',
  'that',
  'the',
  'shadows',
  'of',
  'the',
  'trees',
  'which',
  'fell',
  'upon',
  'the',
  'lake',
  'remained',
  'not',
  'on',
  'the',
  'surface',
  'where',
  'they',
  'fell',
  'but',
  'sunk',
  'slowly',
  'and',
  'steadily',
  'down',
  'and',
  'commingled',
  'with',
  'the',
  'waves',
  'while',
  'from',
  'the',
  'trunks',
  'of',
  'the',
  'trees',
  'other',
  'shadows',
  'were',
  'continually',
  'coming',
  'out',
  'and',
  'taking',
  'the',
  'place',
  'of',
  'their',
  'brothers',
  'thus',
  'entombed'],
 ['this',
  'then',
  'i',
  'said',
  'thoughtfully',
  'is',
  'the',
  'very',
  'reason',
  'why',
  'the',
  'waters',
  'of',
  'this',
  'lake',
  'grow',
  'blacker',
  'with',
  'age',
  'and',
  'more',
  'melancholy',
  'as',
  'the',
  'hours',
  'run',
  'on',
  'but',
  'fancies',
  'such',
  'as',
  'these',
  'were',
  'not',
  'the',
  'sole',
  'possessors',
  'of',
  'my',
  'brain'],
 ['horrors',
  'of',
  'a',
  'nature',
  'most',
  'stern',
  'and',
  'most',
  'appalling',
  'would',
  'too',
  'frequently',
  'obtrude',
  'themselves',
  'upon',
  'my',
  'mind',
  'and',
  'shake',
  'the',
  'innermost',
  'depths',
  'of',
  'my',
  'soul',
  'with',
  'the',
  'bare',
  'supposition',
  'of',
  'their',
  'possibility'],
 ['yet',
  'i',
  'would',
  'not',
  'suffer',
  'my',
  'thoughts',
  'for',
  'any',
  'length',
  'of',
  'time',
  'to',
  'dwell',
  'upon',
  'these',
  'latter',
  'speculations',
  'rightly',
  'judging',
  'the',
  'real',
  'and',
  'palpable',
  'dangers',
  'of',
  'the',
  'voyage',
  'sufficient',
  'for',
  'my',
  'undivided',
  'attention'],
 ['at',
  'five',
  'o',
  'clock',
  'pm',
  'being',
  'engaged',
  'in',
  'regenerating',
  'the',
  'atmosphere',
  'within',
  'the',
  'chamber',
  'i',
  'took',
  'that',
  'opportunity',
  'of',
  'observing',
  'the',
  'cat',
  'and',
  'kittens',
  'through',
  'the',
  'valve'],
 ['the',
  'cat',
  'herself',
  'appeared',
  'to',
  'suffer',
  'again',
  'very',
  'much',
  'and',
  'i',
  'had',
  'no',
  'hesitation',
  'in',
  'attributing',
  'her',
  'uneasiness',
  'chiefly',
  'to',
  'a',
  'difficulty',
  'in',
  'breathing',
  'but',
  'my',
  'experiment',
  'with',
  'the',
  'kittens',
  'had',
  'resulted',
  'very',
  'strangely'],
 ['i',
  'had',
  'expected',
  'of',
  'course',
  'to',
  'see',
  'them',
  'betray',
  'a',
  'sense',
  'of',
  'pain',
  'although',
  'in',
  'a',
  'less',
  'degree',
  'than',
  'their',
  'mother',
  'and',
  'this',
  'would',
  'have',
  'been',
  'sufficient',
  'to',
  'confirm',
  'my',
  'opinion',
  'concerning',
  'the',
  'habitual',
  'endurance',
  'of',
  'atmospheric',
  'pressure'],
 ['but',
  'i',
  'was',
  'not',
  'prepared',
  'to',
  'find',
  'them',
  'upon',
  'close',
  'examination',
  'evidently',
  'enjoying',
  'a',
  'high',
  'degree',
  'of',
  'health',
  'breathing',
  'with',
  'the',
  'greatest',
  'ease',
  'and',
  'perfect',
  'regularity',
  'and',
  'evincing',
  'not',
  'the',
  'slightest',
  'sign',
  'of',
  'any',
  'uneasiness',
  'whatever'],
 ['i',
  'could',
  'only',
  'account',
  'for',
  'all',
  'this',
  'by',
  'extending',
  'my',
  'theory',
  'and',
  'supposing',
  'that',
  'the',
  'highly',
  'rarefied',
  'atmosphere',
  'around',
  'might',
  'perhaps',
  'not',
  'be',
  'as',
  'i',
  'had',
  'taken',
  'for',
  'granted',
  'chemically',
  'insufficient',
  'for',
  'the',
  'purposes',
  'of',
  'life',
  'and',
  'that',
  'a',
  'person',
  'born',
  'in',
  'such',
  'a',
  'medium',
  'might',
  'possibly',
  'be',
  'unaware',
  'of',
  'any',
  'inconvenience',
  'attending',
  'its',
  'inhalation',
  'while',
  'upon',
  'removal',
  'to',
  'the',
  'denser',
  'strata',
  'near',
  'the',
  'earth',
  'he',
  'might',
  'endure',
  'tortures',
  'of',
  'a',
  'similar',
  'nature',
  'to',
  'those',
  'i',
  'had',
  'so',
  'lately',
  'experienced'],
 ['it',
  'has',
  'since',
  'been',
  'to',
  'me',
  'a',
  'matter',
  'of',
  'deep',
  'regret',
  'that',
  'an',
  'awkward',
  'accident',
  'at',
  'this',
  'time',
  'occasioned',
  'me',
  'the',
  'loss',
  'of',
  'my',
  'little',
  'family',
  'of',
  'cats',
  'and',
  'deprived',
  'me',
  'of',
  'the',
  'insight',
  'into',
  'this',
  'matter',
  'which',
  'a',
  'continued',
  'experiment',
  'might',
  'have',
  'afforded'],
 ['in',
  'passing',
  'my',
  'hand',
  'through',
  'the',
  'valve',
  'with',
  'a',
  'cup',
  'of',
  'water',
  'for',
  'the',
  'old',
  'puss',
  'the',
  'sleeves',
  'of',
  'my',
  'shirt',
  'became',
  'entangled',
  'in',
  'the',
  'loop',
  'which',
  'sustained',
  'the',
  'basket',
  'and',
  'thus',
  'in',
  'a',
  'moment',
  'loosened',
  'it',
  'from',
  'the',
  'bottom'],
 ['had',
  'the',
  'whole',
  'actually',
  'vanished',
  'into',
  'air',
  'it',
  'could',
  'not',
  'have',
  'shot',
  'from',
  'my',
  'sight',
  'in',
  'a',
  'more',
  'abrupt',
  'and',
  'instantaneous',
  'manner'],
 ['positively',
  'there',
  'could',
  'not',
  'have',
  'intervened',
  'the',
  'tenth',
  'part',
  'of',
  'a',
  'second',
  'between',
  'the',
  'disengagement',
  'of',
  'the',
  'basket',
  'and',
  'its',
  'absolute',
  'and',
  'total',
  'disappearance',
  'with',
  'all',
  'that',
  'it',
  'contained'],
 ['my',
  'good',
  'wishes',
  'followed',
  'it',
  'to',
  'the',
  'earth',
  'but',
  'of',
  'course',
  'i',
  'had',
  'no',
  'hope',
  'that',
  'either',
  'cat',
  'or',
  'kittens',
  'would',
  'ever',
  'live',
  'to',
  'tell',
  'the',
  'tale',
  'of',
  'their',
  'misfortune'],
 ['at',
  'six',
  'o',
  'clock',
  'i',
  'perceived',
  'a',
  'great',
  'portion',
  'of',
  'the',
  'earth',
  's',
  'visible',
  'area',
  'to',
  'the',
  'eastward',
  'involved',
  'in',
  'thick',
  'shadow',
  'which',
  'continued',
  'to',
  'advance',
  'with',
  'great',
  'rapidity',
  'until',
  'at',
  'five',
  'minutes',
  'before',
  'seven',
  'the',
  'whole',
  'surface',
  'in',
  'view',
  'was',
  'enveloped',
  'in',
  'the',
  'darkness',
  'of',
  'night'],
 ['it',
  'was',
  'not',
  'however',
  'until',
  'long',
  'after',
  'this',
  'time',
  'that',
  'the',
  'rays',
  'of',
  'the',
  'setting',
  'sun',
  'ceased',
  'to',
  'illumine',
  'the',
  'balloon',
  'and',
  'this',
  'circumstance',
  'although',
  'of',
  'course',
  'fully',
  'anticipated',
  'did',
  'not',
  'fail',
  'to',
  'give',
  'me',
  'an',
  'infinite',
  'deal',
  'of',
  'pleasure'],
 ['it',
  'was',
  'evident',
  'that',
  'in',
  'the',
  'morning',
  'i',
  'should',
  'behold',
  'the',
  'rising',
  'luminary',
  'many',
  'hours',
  'at',
  'least',
  'before',
  'the',
  'citizens',
  'of',
  'in',
  'spite',
  'of',
  'their',
  'situation',
  'so',
  'much',
  'farther',
  'to',
  'the',
  'eastward',
  'and',
  'thus',
  'day',
  'after',
  'day',
  'in',
  'proportion',
  'to',
  'the',
  'height',
  'ascended',
  'would',
  'i',
  'enjoy',
  'the',
  'light',
  'of',
  'the',
  'sun',
  'for',
  'a',
  'longer',
  'and',
  'a',
  'longer',
  'period'],
 ['i',
  'now',
  'determined',
  'to',
  'keep',
  'a',
  'journal',
  'of',
  'my',
  'passage',
  'reckoning',
  'the',
  'days',
  'from',
  'one',
  'to',
  'twentyfour',
  'hours',
  'continuously',
  'without',
  'taking',
  'into',
  'consideration',
  'the',
  'intervals',
  'of',
  'darkness'],
 ['at',
  'ten',
  'o',
  'clock',
  'feeling',
  'sleepy',
  'i',
  'determined',
  'to',
  'lie',
  'down',
  'for',
  'the',
  'rest',
  'of',
  'the',
  'night',
  'but',
  'here',
  'a',
  'difficulty',
  'presented',
  'itself',
  'which',
  'obvious',
  'as',
  'it',
  'may',
  'appear',
  'had',
  'escaped',
  'my',
  'attention',
  'up',
  'to',
  'the',
  'very',
  'moment',
  'of',
  'which',
  'i',
  'am',
  'now',
  'speaking'],
 ['if',
  'i',
  'went',
  'to',
  'sleep',
  'as',
  'i',
  'proposed',
  'how',
  'could',
  'the',
  'atmosphere',
  'in',
  'the',
  'chamber',
  'be',
  'regenerated',
  'in',
  'the',
  'interim'],
 ['to',
  'breathe',
  'it',
  'for',
  'more',
  'than',
  'an',
  'hour',
  'at',
  'the',
  'farthest',
  'would',
  'be',
  'a',
  'matter',
  'of',
  'impossibility',
  'or',
  'if',
  'even',
  'this',
  'term',
  'could',
  'be',
  'extended',
  'to',
  'an',
  'hour',
  'and',
  'a',
  'quarter',
  'the',
  'most',
  'ruinous',
  'consequences',
  'might',
  'ensue'],
 ['the',
  'consideration',
  'of',
  'this',
  'dilemma',
  'gave',
  'me',
  'no',
  'little',
  'disquietude',
  'and',
  'it',
  'will',
  'hardly',
  'be',
  'believed',
  'that',
  'after',
  'the',
  'dangers',
  'i',
  'had',
  'undergone',
  'i',
  'should',
  'look',
  'upon',
  'this',
  'business',
  'in',
  'so',
  'serious',
  'a',
  'light',
  'as',
  'to',
  'give',
  'up',
  'all',
  'hope',
  'of',
  'accomplishing',
  'my',
  'ultimate',
  'design',
  'and',
  'finally',
  'make',
  'up',
  'my',
  'mind',
  'to',
  'the',
  'necessity',
  'of',
  'a',
  'descent'],
 ['but', 'this', 'hesitation', 'was', 'only', 'momentary'],
 ['i',
  'reflected',
  'that',
  'man',
  'is',
  'the',
  'veriest',
  'slave',
  'of',
  'custom',
  'and',
  'that',
  'many',
  'points',
  'in',
  'the',
  'routine',
  'of',
  'his',
  'existence',
  'are',
  'deemed',
  'essentially',
  'important',
  'which',
  'are',
  'only',
  'so',
  'at',
  'all',
  'by',
  'his',
  'having',
  'rendered',
  'them',
  'habitual'],
 ['it',
  'was',
  'very',
  'certain',
  'that',
  'i',
  'could',
  'not',
  'do',
  'without',
  'sleep',
  'but',
  'i',
  'might',
  'easily',
  'bring',
  'myself',
  'to',
  'feel',
  'no',
  'inconvenience',
  'from',
  'being',
  'awakened',
  'at',
  'intervals',
  'of',
  'an',
  'hour',
  'during',
  'the',
  'whole',
  'period',
  'of',
  'my',
  'repose'],
 ['it',
  'would',
  'require',
  'but',
  'five',
  'minutes',
  'at',
  'most',
  'to',
  'regenerate',
  'the',
  'atmosphere',
  'in',
  'the',
  'fullest',
  'manner',
  'and',
  'the',
  'only',
  'real',
  'difficulty',
  'was',
  'to',
  'contrive',
  'a',
  'method',
  'of',
  'arousing',
  'myself',
  'at',
  'the',
  'proper',
  'moment',
  'for',
  'so',
  'doing'],
 ['but',
  'this',
  'was',
  'a',
  'question',
  'which',
  'i',
  'am',
  'willing',
  'to',
  'confess',
  'occasioned',
  'me',
  'no',
  'little',
  'trouble',
  'in',
  'its',
  'solution'],
 ['to',
  'be',
  'sure',
  'i',
  'had',
  'heard',
  'of',
  'the',
  'student',
  'who',
  'to',
  'prevent',
  'his',
  'falling',
  'asleep',
  'over',
  'his',
  'books',
  'held',
  'in',
  'one',
  'hand',
  'a',
  'ball',
  'of',
  'copper',
  'the',
  'din',
  'of',
  'whose',
  'descent',
  'into',
  'a',
  'basin',
  'of',
  'the',
  'same',
  'metal',
  'on',
  'the',
  'floor',
  'beside',
  'his',
  'chair',
  'served',
  'effectually',
  'to',
  'startle',
  'him',
  'up',
  'if',
  'at',
  'any',
  'moment',
  'he',
  'should',
  'be',
  'overcome',
  'with',
  'drowsiness'],
 ['my',
  'own',
  'case',
  'however',
  'was',
  'very',
  'different',
  'indeed',
  'and',
  'left',
  'me',
  'no',
  'room',
  'for',
  'any',
  'similar',
  'idea',
  'for',
  'i',
  'did',
  'not',
  'wish',
  'to',
  'keep',
  'awake',
  'but',
  'to',
  'be',
  'aroused',
  'from',
  'slumber',
  'at',
  'regular',
  'intervals',
  'of',
  'time'],
 ['i',
  'at',
  'length',
  'hit',
  'upon',
  'the',
  'following',
  'expedient',
  'which',
  'simple',
  'as',
  'it',
  'may',
  'seem',
  'was',
  'hailed',
  'by',
  'me',
  'at',
  'the',
  'moment',
  'of',
  'discovery',
  'as',
  'an',
  'invention',
  'fully',
  'equal',
  'to',
  'that',
  'of',
  'the',
  'telescope',
  'the',
  'steamengine',
  'or',
  'the',
  'art',
  'of',
  'printing',
  'itself'],
 ['it',
  'is',
  'necessary',
  'to',
  'premise',
  'that',
  'the',
  'balloon',
  'at',
  'the',
  'elevation',
  'now',
  'attained',
  'continued',
  'its',
  'course',
  'upward',
  'with',
  'an',
  'even',
  'and',
  'undeviating',
  'ascent',
  'and',
  'the',
  'car',
  'consequently',
  'followed',
  'with',
  'a',
  'steadiness',
  'so',
  'perfect',
  'that',
  'it',
  'would',
  'have',
  'been',
  'impossible',
  'to',
  'detect',
  'in',
  'it',
  'the',
  'slightest',
  'vacillation',
  'whatever'],
 ['this',
  'circumstance',
  'favored',
  'me',
  'greatly',
  'in',
  'the',
  'project',
  'i',
  'now',
  'determined',
  'to',
  'adopt'],
 ['my',
  'supply',
  'of',
  'water',
  'had',
  'been',
  'put',
  'on',
  'board',
  'in',
  'kegs',
  'containing',
  'five',
  'gallons',
  'each',
  'and',
  'ranged',
  'very',
  'securely',
  'around',
  'the',
  'interior',
  'of',
  'the',
  'car'],
 ['i',
  'unfastened',
  'one',
  'of',
  'these',
  'and',
  'taking',
  'two',
  'ropes',
  'tied',
  'them',
  'tightly',
  'across',
  'the',
  'rim',
  'of',
  'the',
  'wickerwork',
  'from',
  'one',
  'side',
  'to',
  'the',
  'other',
  'placing',
  'them',
  'about',
  'a',
  'foot',
  'apart',
  'and',
  'parallel',
  'so',
  'as',
  'to',
  'form',
  'a',
  'kind',
  'of',
  'shelf',
  'upon',
  'which',
  'i',
  'placed',
  'the',
  'keg',
  'and',
  'steadied',
  'it',
  'in',
  'a',
  'horizontal',
  'position'],
 ['about',
  'eight',
  'inches',
  'immediately',
  'below',
  'these',
  'ropes',
  'and',
  'four',
  'feet',
  'from',
  'the',
  'bottom',
  'of',
  'the',
  'car',
  'i',
  'fastened',
  'another',
  'shelf',
  'but',
  'made',
  'of',
  'thin',
  'plank',
  'being',
  'the',
  'only',
  'similar',
  'piece',
  'of',
  'wood',
  'i',
  'had'],
 ['upon',
  'this',
  'latter',
  'shelf',
  'and',
  'exactly',
  'beneath',
  'one',
  'of',
  'the',
  'rims',
  'of',
  'the',
  'keg',
  'a',
  'small',
  'earthern',
  'pitcher',
  'was',
  'deposited'],
 ['i',
  'now',
  'bored',
  'a',
  'hole',
  'in',
  'the',
  'end',
  'of',
  'the',
  'keg',
  'over',
  'the',
  'pitcher',
  'and',
  'fitted',
  'in',
  'a',
  'plug',
  'of',
  'soft',
  'wood',
  'cut',
  'in',
  'a',
  'tapering',
  'or',
  'conical',
  'shape'],
 ['this',
  'plug',
  'i',
  'pushed',
  'in',
  'or',
  'pulled',
  'out',
  'as',
  'might',
  'happen',
  'until',
  'after',
  'a',
  'few',
  'experiments',
  'it',
  'arrived',
  'at',
  'that',
  'exact',
  'degree',
  'of',
  'tightness',
  'at',
  'which',
  'the',
  'water',
  'oozing',
  'from',
  'the',
  'hole',
  'and',
  'falling',
  'into',
  'the',
  'pitcher',
  'below',
  'would',
  'fill',
  'the',
  'latter',
  'to',
  'the',
  'brim',
  'in',
  'the',
  'period',
  'of',
  'sixty',
  'minutes'],
 ['this',
  'of',
  'course',
  'was',
  'a',
  'matter',
  'briefly',
  'and',
  'easily',
  'ascertained',
  'by',
  'noticing',
  'the',
  'proportion',
  'of',
  'the',
  'pitcher',
  'filled',
  'in',
  'any',
  'given',
  'time'],
 ['having',
  'arranged',
  'all',
  'this',
  'the',
  'rest',
  'of',
  'the',
  'plan',
  'is',
  'obvious'],
 ['my',
  'bed',
  'was',
  'so',
  'contrived',
  'upon',
  'the',
  'floor',
  'of',
  'the',
  'car',
  'as',
  'to',
  'bring',
  'my',
  'head',
  'in',
  'lying',
  'down',
  'immediately',
  'below',
  'the',
  'mouth',
  'of',
  'the',
  'pitcher'],
 ['it',
  'was',
  'evident',
  'that',
  'at',
  'the',
  'expiration',
  'of',
  'an',
  'hour',
  'the',
  'pitcher',
  'getting',
  'full',
  'would',
  'be',
  'forced',
  'to',
  'run',
  'over',
  'and',
  'to',
  'run',
  'over',
  'at',
  'the',
  'mouth',
  'which',
  'was',
  'somewhat',
  'lower',
  'than',
  'the',
  'rim'],
 ['it',
  'was',
  'also',
  'evident',
  'that',
  'the',
  'water',
  'thus',
  'falling',
  'from',
  'a',
  'height',
  'of',
  'more',
  'than',
  'four',
  'feet',
  'could',
  'not',
  'do',
  'otherwise',
  'than',
  'fall',
  'upon',
  'my',
  'face',
  'and',
  'that',
  'the',
  'sure',
  'consequences',
  'would',
  'be',
  'to',
  'waken',
  'me',
  'up',
  'instantaneously',
  'even',
  'from',
  'the',
  'soundest',
  'slumber',
  'in',
  'the',
  'world'],
 ['it',
  'was',
  'fully',
  'eleven',
  'by',
  'the',
  'time',
  'i',
  'had',
  'completed',
  'these',
  'arrangements',
  'and',
  'i',
  'immediately',
  'betook',
  'myself',
  'to',
  'bed',
  'with',
  'full',
  'confidence',
  'in',
  'the',
  'efficiency',
  'of',
  'my',
  'invention'],
 ['nor', 'in', 'this', 'matter', 'was', 'i', 'disappointed'],
 ['punctually',
  'every',
  'sixty',
  'minutes',
  'was',
  'i',
  'aroused',
  'by',
  'my',
  'trusty',
  'chronometer',
  'when',
  'having',
  'emptied',
  'the',
  'pitcher',
  'into',
  'the',
  'bunghole',
  'of',
  'the',
  'keg',
  'and',
  'performed',
  'the',
  'duties',
  'of',
  'the',
  'condenser',
  'i',
  'retired',
  'again',
  'to',
  'bed'],
 ['these',
  'regular',
  'interruptions',
  'to',
  'my',
  'slumber',
  'caused',
  'me',
  'even',
  'less',
  'discomfort',
  'than',
  'i',
  'had',
  'anticipated',
  'and',
  'when',
  'i',
  'finally',
  'arose',
  'for',
  'the',
  'day',
  'it',
  'was',
  'seven',
  'o',
  'clock',
  'and',
  'the',
  'sun',
  'had',
  'attained',
  'many',
  'degrees',
  'above',
  'the',
  'line',
  'of',
  'my',
  'horizon'],
 ['3d'],
 ['i',
  'found',
  'the',
  'balloon',
  'at',
  'an',
  'immense',
  'height',
  'indeed',
  'and',
  'the',
  'earth',
  's',
  'apparent',
  'convexity',
  'increased',
  'in',
  'a',
  'material',
  'degree'],
 ['below',
  'me',
  'in',
  'the',
  'ocean',
  'lay',
  'a',
  'cluster',
  'of',
  'black',
  'specks',
  'which',
  'undoubtedly',
  'were',
  'islands'],
 ['far',
  'away',
  'to',
  'the',
  'northward',
  'i',
  'perceived',
  'a',
  'thin',
  'white',
  'and',
  'exceedingly',
  'brilliant',
  'line',
  'or',
  'streak',
  'on',
  'the',
  'edge',
  'of',
  'the',
  'horizon',
  'and',
  'i',
  'had',
  'no',
  'hesitation',
  'in',
  'supposing',
  'it',
  'to',
  'be',
  'the',
  'southern',
  'disk',
  'of',
  'the',
  'ices',
  'of',
  'the'],
 ['my',
  'curiosity',
  'was',
  'greatly',
  'excited',
  'for',
  'i',
  'had',
  'hopes',
  'of',
  'passing',
  'on',
  'much',
  'farther',
  'to',
  'the',
  'north',
  'and',
  'might',
  'possibly',
  'at',
  'some',
  'period',
  'find',
  'myself',
  'placed',
  'directly',
  'above',
  'the',
  'itself'],
 ['i',
  'now',
  'lamented',
  'that',
  'my',
  'great',
  'elevation',
  'would',
  'in',
  'this',
  'case',
  'prevent',
  'my',
  'taking',
  'as',
  'accurate',
  'a',
  'survey',
  'as',
  'i',
  'could',
  'wish'],
 ['much', 'however', 'might', 'be', 'ascertained'],
 ['nothing',
  'else',
  'of',
  'an',
  'extraordinary',
  'nature',
  'occurred',
  'during',
  'the',
  'day'],
 ['my',
  'apparatus',
  'all',
  'continued',
  'in',
  'good',
  'order',
  'and',
  'the',
  'balloon',
  'still',
  'ascended',
  'without',
  'any',
  'perceptible',
  'vacillation'],
 ['the',
  'cold',
  'was',
  'intense',
  'and',
  'obliged',
  'me',
  'to',
  'wrap',
  'up',
  'closely',
  'in',
  'an',
  'overcoat'],
 ['when',
  'darkness',
  'came',
  'over',
  'the',
  'earth',
  'i',
  'betook',
  'myself',
  'to',
  'bed',
  'although',
  'it',
  'was',
  'for',
  'many',
  'hours',
  'afterward',
  'broad',
  'daylight',
  'all',
  'around',
  'my',
  'immediate',
  'situation'],
 ['the',
  'waterclock',
  'was',
  'punctual',
  'in',
  'its',
  'duty',
  'and',
  'i',
  'slept',
  'until',
  'next',
  'morning',
  'soundly',
  'with',
  'the',
  'exception',
  'of',
  'the',
  'periodical',
  'interruption'],
 ['4th'],
 ['in',
  'good',
  'health',
  'and',
  'spirits',
  'and',
  'was',
  'astonished',
  'at',
  'the',
  'singular',
  'change',
  'which',
  'had',
  'taken',
  'place',
  'in',
  'the',
  'appearance',
  'of',
  'the',
  'sea'],
 ['it',
  'had',
  'lost',
  'in',
  'a',
  'great',
  'measure',
  'the',
  'deep',
  'tint',
  'of',
  'blue',
  'it',
  'had',
  'hitherto',
  'worn',
  'being',
  'now',
  'of',
  'a',
  'grayishwhite',
  'and',
  'of',
  'a',
  'lustre',
  'dazzling',
  'to',
  'the',
  'eye'],
 ['the',
  'islands',
  'were',
  'no',
  'longer',
  'visible',
  'whether',
  'they',
  'had',
  'passed',
  'down',
  'the',
  'horizon',
  'to',
  'the',
  'southeast',
  'or',
  'whether',
  'my',
  'increasing',
  'elevation',
  'had',
  'left',
  'them',
  'out',
  'of',
  'sight',
  'it',
  'is',
  'impossible',
  'to',
  'say'],
 ['i', 'was', 'inclined', 'however', 'to', 'the', 'latter', 'opinion'],
 ['the',
  'rim',
  'of',
  'ice',
  'to',
  'the',
  'northward',
  'was',
  'growing',
  'more',
  'and',
  'more',
  'apparent'],
 ['cold', 'by', 'no', 'means', 'so', 'intense'],
 ['nothing',
  'of',
  'importance',
  'occurred',
  'and',
  'i',
  'passed',
  'the',
  'day',
  'in',
  'reading',
  'having',
  'taken',
  'care',
  'to',
  'supply',
  'myself',
  'with',
  'books'],
 ['5th'],
 ['beheld',
  'the',
  'singular',
  'phenomenon',
  'of',
  'the',
  'sun',
  'rising',
  'while',
  'nearly',
  'the',
  'whole',
  'visible',
  'surface',
  'of',
  'the',
  'earth',
  'continued',
  'to',
  'be',
  'involved',
  'in',
  'darkness'],
 ['in',
  'time',
  'however',
  'the',
  'light',
  'spread',
  'itself',
  'over',
  'all',
  'and',
  'i',
  'again',
  'saw',
  'the',
  'line',
  'of',
  'ice',
  'to',
  'the',
  'northward'],
 ['it',
  'was',
  'now',
  'very',
  'distinct',
  'and',
  'appeared',
  'of',
  'a',
  'much',
  'darker',
  'hue',
  'than',
  'the',
  'waters',
  'of',
  'the',
  'ocean'],
 ['i',
  'was',
  'evidently',
  'approaching',
  'it',
  'and',
  'with',
  'great',
  'rapidity'],
 ['i',
  'could',
  'again',
  'distinguish',
  'a',
  'strip',
  'of',
  'land',
  'to',
  'the',
  'eastward',
  'and',
  'one',
  'also',
  'to',
  'the',
  'westward',
  'but',
  'could',
  'not',
  'be',
  'certain'],
 ['weather', 'moderate'],
 ['nothing', 'of', 'any', 'consequence', 'happened', 'during', 'the', 'day'],
 ['went', 'early', 'to', 'bed'],
 ['6th'],
 ['surprised',
  'at',
  'finding',
  'the',
  'rim',
  'of',
  'ice',
  'at',
  'a',
  'very',
  'moderate',
  'distance',
  'and',
  'an',
  'immense',
  'field',
  'of',
  'the',
  'same',
  'material',
  'stretching',
  'away',
  'off',
  'to',
  'the',
  'horizon',
  'in',
  'the',
  'north'],
 ['it',
  'was',
  'evident',
  'that',
  'if',
  'the',
  'balloon',
  'held',
  'its',
  'present',
  'course',
  'it',
  'would',
  'soon',
  'arrive',
  'above',
  'the',
  'and',
  'i',
  'had',
  'now',
  'little',
  'doubt',
  'of',
  'ultimately',
  'seeing',
  'the'],
 ['during',
  'the',
  'whole',
  'of',
  'the',
  'day',
  'i',
  'continued',
  'to',
  'near',
  'the',
  'ice'],
 ['night',
  'the',
  'limits',
  'of',
  'my',
  'horizon',
  'very',
  'suddenly',
  'and',
  'materially',
  'increased',
  'owing',
  'undoubtedly',
  'to',
  'the',
  'earth',
  's',
  'form',
  'being',
  'that',
  'of',
  'an',
  'oblate',
  'spheroid',
  'and',
  'my',
  'arriving',
  'above',
  'the',
  'flattened',
  'regions',
  'in',
  'the',
  'vicinity',
  'of',
  'the',
  'circle'],
 ['when',
  'darkness',
  'at',
  'length',
  'overtook',
  'me',
  'i',
  'went',
  'to',
  'bed',
  'in',
  'great',
  'anxiety',
  'fearing',
  'to',
  'pass',
  'over',
  'the',
  'object',
  'of',
  'so',
  'much',
  'curiosity',
  'when',
  'i',
  'should',
  'have',
  'no',
  'opportunity',
  'of',
  'observing',
  'it'],
 ['7th'],
 ['early',
  'and',
  'to',
  'my',
  'great',
  'joy',
  'at',
  'length',
  'beheld',
  'what',
  'there',
  'could',
  'be',
  'no',
  'hesitation',
  'in',
  'supposing',
  'the',
  'northern',
  'itself'],
 ['it',
  'was',
  'there',
  'beyond',
  'a',
  'doubt',
  'and',
  'immediately',
  'beneath',
  'my',
  'feet',
  'but',
  'alas'],
 ['i',
  'had',
  'now',
  'ascended',
  'to',
  'so',
  'vast',
  'a',
  'distance',
  'that',
  'nothing',
  'could',
  'with',
  'accuracy',
  'be',
  'discerned'],
 ['indeed',
  'to',
  'judge',
  'from',
  'the',
  'progression',
  'of',
  'the',
  'numbers',
  'indicating',
  'my',
  'various',
  'altitudes',
  'respectively',
  'at',
  'different',
  'periods',
  'between',
  'six',
  'on',
  'the',
  'second',
  'of',
  'and',
  'twenty',
  'minutes',
  'before',
  'nine',
  'of',
  'the',
  'same',
  'day',
  'at',
  'which',
  'time',
  'the',
  'barometer',
  'ran',
  'down',
  'it',
  'might',
  'be',
  'fairly',
  'inferred',
  'that',
  'the',
  'balloon',
  'had',
  'now',
  'at',
  'four',
  'o',
  'clock',
  'in',
  'the',
  'morning',
  'of',
  'the',
  'seventh',
  'reached',
  'a',
  'height',
  'of',
  'not',
  'less',
  'certainly',
  'than',
  '7254',
  'miles',
  'above',
  'the',
  'surface',
  'of',
  'the',
  'sea'],
 ['this',
  'elevation',
  'may',
  'appear',
  'immense',
  'but',
  'the',
  'estimate',
  'upon',
  'which',
  'it',
  'is',
  'calculated',
  'gave',
  'a',
  'result',
  'in',
  'all',
  'probability',
  'far',
  'inferior',
  'to',
  'the',
  'truth'],
 ['at',
  'all',
  'events',
  'i',
  'undoubtedly',
  'beheld',
  'the',
  'whole',
  'of',
  'the',
  'earth',
  's',
  'major',
  'diameter',
  'the',
  'entire',
  'northern',
  'hemisphere',
  'lay',
  'beneath',
  'me',
  'like',
  'a',
  'chart',
  'orthographically',
  'projected',
  'and',
  'the',
  'great',
  'circle',
  'of',
  'the',
  'equator',
  'itself',
  'formed',
  'the',
  'boundary',
  'line',
  'of',
  'my',
  'horizon'],
 ['your',
  'excellencies',
  'may',
  'however',
  'readily',
  'imagine',
  'that',
  'the',
  'confined',
  'regions',
  'hitherto',
  'unexplored',
  'within',
  'the',
  'limits',
  'of',
  'the',
  'circle',
  'although',
  'situated',
  'directly',
  'beneath',
  'me',
  'and',
  'therefore',
  'seen',
  'without',
  'any',
  'appearance',
  'of',
  'being',
  'foreshortened',
  'were',
  'still',
  'in',
  'themselves',
  'comparatively',
  'too',
  'diminutive',
  'and',
  'at',
  'too',
  'great',
  'a',
  'distance',
  'from',
  'the',
  'point',
  'of',
  'sight',
  'to',
  'admit',
  'of',
  'any',
  'very',
  'accurate',
  'examination'],
 ['nevertheless',
  'what',
  'could',
  'be',
  'seen',
  'was',
  'of',
  'a',
  'nature',
  'singular',
  'and',
  'exciting'],
 ['northwardly',
  'from',
  'that',
  'huge',
  'rim',
  'before',
  'mentioned',
  'and',
  'which',
  'with',
  'slight',
  'qualification',
  'may',
  'be',
  'called',
  'the',
  'limit',
  'of',
  'human',
  'discovery',
  'in',
  'these',
  'regions',
  'one',
  'unbroken',
  'or',
  'nearly',
  'unbroken',
  'sheet',
  'of',
  'ice',
  'continues',
  'to',
  'extend'],
 ['in',
  'the',
  'first',
  'few',
  'degrees',
  'of',
  'this',
  'its',
  'progress',
  'its',
  'surface',
  'is',
  'very',
  'sensibly',
  'flattened',
  'farther',
  'on',
  'depressed',
  'into',
  'a',
  'plane',
  'and',
  'finally',
  'becoming',
  'not',
  'a',
  'little',
  'concave',
  'it',
  'terminates',
  'at',
  'the',
  'itself',
  'in',
  'a',
  'circular',
  'centre',
  'sharply',
  'defined',
  'whose',
  'apparent',
  'diameter',
  'subtended',
  'at',
  'the',
  'balloon',
  'an',
  'angle',
  'of',
  'about',
  'sixtyfive',
  'seconds',
  'and',
  'whose',
  'dusky',
  'hue',
  'varying',
  'in',
  'intensity',
  'was',
  'at',
  'all',
  'times',
  'darker',
  'than',
  'any',
  'other',
  'spot',
  'upon',
  'the',
  'visible',
  'hemisphere',
  'and',
  'occasionally',
  'deepened',
  'into',
  'the',
  'most',
  'absolute',
  'and',
  'impenetrable',
  'blackness'],
 ['farther', 'than', 'this', 'little', 'could', 'be', 'ascertained'],
 ['by',
  'twelve',
  'o',
  'clock',
  'the',
  'circular',
  'centre',
  'had',
  'materially',
  'decreased',
  'in',
  'circumference',
  'and',
  'by',
  'seven'],
 ['i',
  'lost',
  'sight',
  'of',
  'it',
  'entirely',
  'the',
  'balloon',
  'passing',
  'over',
  'the',
  'western',
  'limb',
  'of',
  'the',
  'ice',
  'and',
  'floating',
  'away',
  'rapidly',
  'in',
  'the',
  'direction',
  'of',
  'the',
  'equator'],
 ['8th'],
 ['found',
  'a',
  'sensible',
  'diminution',
  'in',
  'the',
  'earth',
  's',
  'apparent',
  'diameter',
  'besides',
  'a',
  'material',
  'alteration',
  'in',
  'its',
  'general',
  'color',
  'and',
  'appearance'],
 ['the',
  'whole',
  'visible',
  'area',
  'partook',
  'in',
  'different',
  'degrees',
  'of',
  'a',
  'tint',
  'of',
  'pale',
  'yellow',
  'and',
  'in',
  'some',
  'portions',
  'had',
  'acquired',
  'a',
  'brilliancy',
  'even',
  'painful',
  'to',
  'the',
  'eye'],
 ['my',
  'view',
  'downward',
  'was',
  'also',
  'considerably',
  'impeded',
  'by',
  'the',
  'dense',
  'atmosphere',
  'in',
  'the',
  'vicinity',
  'of',
  'the',
  'surface',
  'being',
  'loaded',
  'with',
  'clouds',
  'between',
  'whose',
  'masses',
  'i',
  'could',
  'only',
  'now',
  'and',
  'then',
  'obtain',
  'a',
  'glimpse',
  'of',
  'the',
  'earth',
  'itself'],
 ['this',
  'difficulty',
  'of',
  'direct',
  'vision',
  'had',
  'troubled',
  'me',
  'more',
  'or',
  'less',
  'for',
  'the',
  'last',
  'fortyeight',
  'hours',
  'but',
  'my',
  'present',
  'enormous',
  'elevation',
  'brought',
  'closer',
  'together',
  'as',
  'it',
  'were',
  'the',
  'floating',
  'bodies',
  'of',
  'vapor',
  'and',
  'the',
  'inconvenience',
  'became',
  'of',
  'course',
  'more',
  'and',
  'more',
  'palpable',
  'in',
  'proportion',
  'to',
  'my',
  'ascent'],
 ['nevertheless',
  'i',
  'could',
  'easily',
  'perceive',
  'that',
  'the',
  'balloon',
  'now',
  'hovered',
  'above',
  'the',
  'range',
  'of',
  'great',
  'lakes',
  'in',
  'the',
  'continent',
  'of',
  'and',
  'was',
  'holding',
  'a',
  'course',
  'due',
  'south',
  'which',
  'would',
  'bring',
  'me',
  'to',
  'the',
  'tropics'],
 ['this',
  'circumstance',
  'did',
  'not',
  'fail',
  'to',
  'give',
  'me',
  'the',
  'most',
  'heartful',
  'satisfaction',
  'and',
  'i',
  'hailed',
  'it',
  'as',
  'a',
  'happy',
  'omen',
  'of',
  'ultimate',
  'success'],
 ['indeed',
  'the',
  'direction',
  'i',
  'had',
  'hitherto',
  'taken',
  'had',
  'filled',
  'me',
  'with',
  'uneasiness',
  'for',
  'it',
  'was',
  'evident',
  'that',
  'had',
  'i',
  'continued',
  'it',
  'much',
  'longer',
  'there',
  'would',
  'have',
  'been',
  'no',
  'possibility',
  'of',
  'my',
  'arriving',
  'at',
  'the',
  'moon',
  'at',
  'all',
  'whose',
  'orbit',
  'is',
  'inclined',
  'to',
  'the',
  'ecliptic',
  'at',
  'only',
  'the',
  'small',
  'angle',
  'of',
  '5',
  'degrees',
  '8',
  '48'],
 ['9th'],
 ['today',
  'the',
  'earth',
  's',
  'diameter',
  'was',
  'greatly',
  'diminished',
  'and',
  'the',
  'color',
  'of',
  'the',
  'surface',
  'assumed',
  'hourly',
  'a',
  'deeper',
  'tint',
  'of',
  'yellow'],
 ['the',
  'balloon',
  'kept',
  'steadily',
  'on',
  'her',
  'course',
  'to',
  'the',
  'southward',
  'and',
  'arrived',
  'at',
  'nine',
  'over',
  'the',
  'northern',
  'edge',
  'of',
  'the'],
 ['10th'],
 ['i',
  'was',
  'suddenly',
  'aroused',
  'from',
  'slumber',
  'about',
  'five',
  'o',
  'clock',
  'this',
  'morning',
  'by',
  'a',
  'loud',
  'crackling',
  'and',
  'terrific',
  'sound',
  'for',
  'which',
  'i',
  'could',
  'in',
  'no',
  'manner',
  'account'],
 ['it',
  'was',
  'of',
  'very',
  'brief',
  'duration',
  'but',
  'while',
  'it',
  'lasted',
  'resembled',
  'nothing',
  'in',
  'the',
  'world',
  'of',
  'which',
  'i',
  'had',
  'any',
  'previous',
  'experience'],
 ['it',
  'is',
  'needless',
  'to',
  'say',
  'that',
  'i',
  'became',
  'excessively',
  'alarmed',
  'having',
  'in',
  'the',
  'first',
  'instance',
  'attributed',
  'the',
  'noise',
  'to',
  'the',
  'bursting',
  'of',
  'the',
  'balloon'],
 ['i',
  'examined',
  'all',
  'my',
  'apparatus',
  'however',
  'with',
  'great',
  'attention',
  'and',
  'could',
  'discover',
  'nothing',
  'out',
  'of',
  'order'],
 ['a',
  'great',
  'part',
  'of',
  'the',
  'day',
  'in',
  'meditating',
  'upon',
  'an',
  'occurrence',
  'so',
  'extraordinary',
  'but',
  'could',
  'find',
  'no',
  'means',
  'whatever',
  'of',
  'accounting',
  'for',
  'it'],
 ['went',
  'to',
  'bed',
  'dissatisfied',
  'and',
  'in',
  'a',
  'state',
  'of',
  'great',
  'anxiety',
  'and',
  'agitation'],
 ['11th'],
 ['found',
  'a',
  'startling',
  'diminution',
  'in',
  'the',
  'apparent',
  'diameter',
  'of',
  'the',
  'earth',
  'and',
  'a',
  'considerable',
  'increase',
  'now',
  'observable',
  'for',
  'the',
  'first',
  'time',
  'in',
  'that',
  'of',
  'the',
  'moon',
  'itself',
  'which',
  'wanted',
  'only',
  'a',
  'few',
  'days',
  'of',
  'being',
  'full'],
 ['it',
  'now',
  'required',
  'long',
  'and',
  'excessive',
  'labor',
  'to',
  'condense',
  'within',
  'the',
  'chamber',
  'sufficient',
  'atmospheric',
  'air',
  'for',
  'the',
  'sustenance',
  'of',
  'life'],
 ['12th'],
 ['a',
  'singular',
  'alteration',
  'took',
  'place',
  'in',
  'regard',
  'to',
  'the',
  'direction',
  'of',
  'the',
  'balloon',
  'and',
  'although',
  'fully',
  'anticipated',
  'afforded',
  'me',
  'the',
  'most',
  'unequivocal',
  'delight'],
 ['having',
  'reached',
  'in',
  'its',
  'former',
  'course',
  'about',
  'the',
  'twentieth',
  'parallel',
  'of',
  'southern',
  'latitude',
  'it',
  'turned',
  'off',
  'suddenly',
  'at',
  'an',
  'acute',
  'angle',
  'to',
  'the',
  'eastward',
  'and',
  'thus',
  'proceeded',
  'throughout',
  'the',
  'day',
  'keeping',
  'nearly',
  'if',
  'not',
  'altogether',
  'in',
  'the',
  'exact',
  'plane',
  'of',
  'the',
  'lunar',
  'elipse'],
 ['what',
  'was',
  'worthy',
  'of',
  'remark',
  'a',
  'very',
  'perceptible',
  'vacillation',
  'in',
  'the',
  'car',
  'was',
  'a',
  'consequence',
  'of',
  'this',
  'change',
  'of',
  'route',
  'a',
  'vacillation',
  'which',
  'prevailed',
  'in',
  'a',
  'more',
  'or',
  'less',
  'degree',
  'for',
  'a',
  'period',
  'of',
  'many',
  'hours'],
 ['13th'],
 ['again',
  'very',
  'much',
  'alarmed',
  'by',
  'a',
  'repetition',
  'of',
  'the',
  'loud',
  'crackling',
  'noise',
  'which',
  'terrified',
  'me',
  'on',
  'the',
  'tenth'],
 ['thought',
  'long',
  'upon',
  'the',
  'subject',
  'but',
  'was',
  'unable',
  'to',
  'form',
  'any',
  'satisfactory',
  'conclusion'],
 ['great',
  'decrease',
  'in',
  'the',
  'earth',
  's',
  'apparent',
  'diameter',
  'which',
  'now',
  'subtended',
  'from',
  'the',
  'balloon',
  'an',
  'angle',
  'of',
  'very',
  'little',
  'more',
  'than',
  'twentyfive',
  'degrees'],
 ['the',
  'moon',
  'could',
  'not',
  'be',
  'seen',
  'at',
  'all',
  'being',
  'nearly',
  'in',
  'my',
  'zenith'],
 ['i',
  'still',
  'continued',
  'in',
  'the',
  'plane',
  'of',
  'the',
  'elipse',
  'but',
  'made',
  'little',
  'progress',
  'to',
  'the',
  'eastward'],
 ['14th'],
 ['extremely',
  'rapid',
  'decrease',
  'in',
  'the',
  'diameter',
  'of',
  'the',
  'earth'],
 ['today',
  'i',
  'became',
  'strongly',
  'impressed',
  'with',
  'the',
  'idea',
  'that',
  'the',
  'balloon',
  'was',
  'now',
  'actually',
  'running',
  'up',
  'the',
  'line',
  'of',
  'apsides',
  'to',
  'the',
  'point',
  'of',
  'perigee',
  'in',
  'other',
  'words',
  'holding',
  'the',
  'direct',
  'course',
  'which',
  'would',
  'bring',
  'it',
  'immediately',
  'to',
  'the',
  'moon',
  'in',
  'that',
  'part',
  'of',
  'its',
  'orbit',
  'the',
  'nearest',
  'to',
  'the',
  'earth'],
 ['the',
  'moon',
  'itself',
  'was',
  'directly',
  'overhead',
  'and',
  'consequently',
  'hidden',
  'from',
  'my',
  'view'],
 ['and',
  'longcontinued',
  'labor',
  'necessary',
  'for',
  'the',
  'condensation',
  'of',
  'the',
  'atmosphere'],
 ['15th'],
 ['not',
  'even',
  'the',
  'outlines',
  'of',
  'continents',
  'and',
  'seas',
  'could',
  'now',
  'be',
  'traced',
  'upon',
  'the',
  'earth',
  'with',
  'anything',
  'approaching',
  'distinctness'],
 ['about',
  'twelve',
  'o',
  'clock',
  'i',
  'became',
  'aware',
  'for',
  'the',
  'third',
  'time',
  'of',
  'that',
  'appalling',
  'sound',
  'which',
  'had',
  'so',
  'astonished',
  'me',
  'before'],
 ['it',
  'now',
  'however',
  'continued',
  'for',
  'some',
  'moments',
  'and',
  'gathered',
  'intensity',
  'as',
  'it',
  'continued'],
 ['at',
  'length',
  'while',
  'stupefied',
  'and',
  'terrorstricken',
  'i',
  'stood',
  'in',
  'expectation',
  'of',
  'i',
  'knew',
  'not',
  'what',
  'hideous',
  'destruction',
  'the',
  'car',
  'vibrated',
  'with',
  'excessive',
  'violence',
  'and',
  'a',
  'gigantic',
  'and',
  'flaming',
  'mass',
  'of',
  'some',
  'material',
  'which',
  'i',
  'could',
  'not',
  'distinguish',
  'came',
  'with',
  'a',
  'voice',
  'of',
  'a',
  'thousand',
  'thunders',
  'roaring',
  'and',
  'booming',
  'by',
  'the',
  'balloon'],
 ['when',
  'my',
  'fears',
  'and',
  'astonishment',
  'had',
  'in',
  'some',
  'degree',
  'subsided',
  'i',
  'had',
  'little',
  'difficulty',
  'in',
  'supposing',
  'it',
  'to',
  'be',
  'some',
  'mighty',
  'volcanic',
  'fragment',
  'ejected',
  'from',
  'that',
  'world',
  'to',
  'which',
  'i',
  'was',
  'so',
  'rapidly',
  'approaching',
  'and',
  'in',
  'all',
  'probability',
  'one',
  'of',
  'that',
  'singular',
  'class',
  'of',
  'substances',
  'occasionally',
  'picked',
  'up',
  'on',
  'the',
  'earth',
  'and',
  'termed',
  'meteoric',
  'stones',
  'for',
  'want',
  'of',
  'a',
  'better',
  'appellation'],
 ['16th'],
 ['today',
  'looking',
  'upward',
  'as',
  'well',
  'as',
  'i',
  'could',
  'through',
  'each',
  'of',
  'the',
  'side',
  'windows',
  'alternately',
  'i',
  'beheld',
  'to',
  'my',
  'great',
  'delight',
  'a',
  'very',
  'small',
  'portion',
  'of',
  'the',
  'moon',
  's',
  'disk',
  'protruding',
  'as',
  'it',
  'were',
  'on',
  'all',
  'sides',
  'beyond',
  'the',
  'huge',
  'circumference',
  'of',
  'the',
  'balloon'],
 ['my',
  'agitation',
  'was',
  'extreme',
  'for',
  'i',
  'had',
  'now',
  'little',
  'doubt',
  'of',
  'soon',
  'reaching',
  'the',
  'end',
  'of',
  'my',
  'perilous',
  'voyage'],
 ['indeed',
  'the',
  'labor',
  'now',
  'required',
  'by',
  'the',
  'condenser',
  'had',
  'increased',
  'to',
  'a',
  'most',
  'oppressive',
  'degree',
  'and',
  'allowed',
  'me',
  'scarcely',
  'any',
  'respite',
  'from',
  'exertion'],
 ['was', 'a', 'matter', 'nearly', 'out', 'of', 'the', 'question'],
 ['i',
  'became',
  'quite',
  'ill',
  'and',
  'my',
  'frame',
  'trembled',
  'with',
  'exhaustion'],
 ['it',
  'was',
  'impossible',
  'that',
  'human',
  'nature',
  'could',
  'endure',
  'this',
  'state',
  'of',
  'intense',
  'suffering',
  'much',
  'longer'],
 ['during',
  'the',
  'now',
  'brief',
  'interval',
  'of',
  'darkness',
  'a',
  'meteoric',
  'stone',
  'again',
  'passed',
  'in',
  'my',
  'vicinity',
  'and',
  'the',
  'frequency',
  'of',
  'these',
  'phenomena',
  'began',
  'to',
  'occasion',
  'me',
  'much',
  'apprehension'],
 ['17th'],
 ['this', 'morning', 'proved', 'an', 'epoch', 'in', 'my', 'voyage'],
 ['it',
  'will',
  'be',
  'remembered',
  'that',
  'on',
  'the',
  'thirteenth',
  'the',
  'earth',
  'subtended',
  'an',
  'angular',
  'breadth',
  'of',
  'twentyfive',
  'degrees'],
 ['on',
  'the',
  'fourteenth',
  'this',
  'had',
  'greatly',
  'diminished',
  'on',
  'the',
  'fifteenth',
  'a',
  'still',
  'more',
  'remarkable',
  'decrease',
  'was',
  'observable',
  'and',
  'on',
  'retiring',
  'on',
  'the',
  'night',
  'of',
  'the',
  'sixteenth',
  'i',
  'had',
  'noticed',
  'an',
  'angle',
  'of',
  'no',
  'more',
  'than',
  'about',
  'seven',
  'degrees',
  'and',
  'fifteen',
  'minutes'],
 ['what',
  'therefore',
  'must',
  'have',
  'been',
  'my',
  'amazement',
  'on',
  'awakening',
  'from',
  'a',
  'brief',
  'and',
  'disturbed',
  'slumber',
  'on',
  'the',
  'morning',
  'of',
  'this',
  'day',
  'the',
  'seventeenth',
  'at',
  'finding',
  'the',
  'surface',
  'beneath',
  'me',
  'so',
  'suddenly',
  'and',
  'wonderfully',
  'augmented',
  'in',
  'volume',
  'as',
  'to',
  'subtend',
  'no',
  'less',
  'than',
  'thirtynine',
  'degrees',
  'in',
  'apparent',
  'angular',
  'diameter'],
 ['i', 'was', 'thunderstruck'],
 ['no',
  'words',
  'can',
  'give',
  'any',
  'adequate',
  'idea',
  'of',
  'the',
  'extreme',
  'the',
  'absolute',
  'horror',
  'and',
  'astonishment',
  'with',
  'which',
  'i',
  'was',
  'seized',
  'possessed',
  'and',
  'altogether',
  'overwhelmed'],
 ['my',
  'knees',
  'tottered',
  'beneath',
  'me',
  'my',
  'teeth',
  'chattered',
  'my',
  'hair',
  'started',
  'up',
  'on',
  'end'],
 ['the',
  'balloon',
  'then',
  'had',
  'actually',
  'burst',
  'these',
  'were',
  'the',
  'first',
  'tumultuous',
  'ideas',
  'that',
  'hurried',
  'through',
  'my',
  'mind',
  'the',
  'balloon',
  'had',
  'positively',
  'burst',
  'i',
  'was',
  'falling',
  'falling',
  'with',
  'the',
  'most',
  'impetuous',
  'the',
  'most',
  'unparalleled',
  'velocity'],
 ['to',
  'judge',
  'by',
  'the',
  'immense',
  'distance',
  'already',
  'so',
  'quickly',
  'passed',
  'over',
  'it',
  'could',
  'not',
  'be',
  'more',
  'than',
  'ten',
  'minutes',
  'at',
  'the',
  'farthest',
  'before',
  'i',
  'should',
  'meet',
  'the',
  'surface',
  'of',
  'the',
  'earth',
  'and',
  'be',
  'hurled',
  'into',
  'annihilation',
  'but',
  'at',
  'length',
  'reflection',
  'came',
  'to',
  'my',
  'relief'],
 ['i', 'paused', 'i', 'considered', 'and', 'i', 'began', 'to', 'doubt'],
 ['the', 'matter', 'was', 'impossible'],
 ['i',
  'could',
  'not',
  'in',
  'any',
  'reason',
  'have',
  'so',
  'rapidly',
  'come',
  'down'],
 ['besides',
  'although',
  'i',
  'was',
  'evidently',
  'approaching',
  'the',
  'surface',
  'below',
  'me',
  'it',
  'was',
  'with',
  'a',
  'speed',
  'by',
  'no',
  'means',
  'commensurate',
  'with',
  'the',
  'velocity',
  'i',
  'had',
  'at',
  'first',
  'so',
  'horribly',
  'conceived'],
 ['this',
  'consideration',
  'served',
  'to',
  'calm',
  'the',
  'perturbation',
  'of',
  'my',
  'mind',
  'and',
  'i',
  'finally',
  'succeeded',
  'in',
  'regarding',
  'the',
  'phenomenon',
  'in',
  'its',
  'proper',
  'point',
  'of',
  'view'],
 ['in',
  'fact',
  'amazement',
  'must',
  'have',
  'fairly',
  'deprived',
  'me',
  'of',
  'my',
  'senses',
  'when',
  'i',
  'could',
  'not',
  'see',
  'the',
  'vast',
  'difference',
  'in',
  'appearance',
  'between',
  'the',
  'surface',
  'below',
  'me',
  'and',
  'the',
  'surface',
  'of',
  'my',
  'mother',
  'earth'],
 ['the',
  'latter',
  'was',
  'indeed',
  'over',
  'my',
  'head',
  'and',
  'completely',
  'hidden',
  'by',
  'the',
  'balloon',
  'while',
  'the',
  'moon',
  'the',
  'moon',
  'itself',
  'in',
  'all',
  'its',
  'glory',
  'lay',
  'beneath',
  'me',
  'and',
  'at',
  'my',
  'feet'],
 ['the',
  'stupor',
  'and',
  'surprise',
  'produced',
  'in',
  'my',
  'mind',
  'by',
  'this',
  'extraordinary',
  'change',
  'in',
  'the',
  'posture',
  'of',
  'affairs',
  'was',
  'perhaps',
  'after',
  'all',
  'that',
  'part',
  'of',
  'the',
  'adventure',
  'least',
  'susceptible',
  'of',
  'explanation'],
 ['for',
  'the',
  'bouleversement',
  'in',
  'itself',
  'was',
  'not',
  'only',
  'natural',
  'and',
  'inevitable',
  'but',
  'had',
  'been',
  'long',
  'actually',
  'anticipated',
  'as',
  'a',
  'circumstance',
  'to',
  'be',
  'expected',
  'whenever',
  'i',
  'should',
  'arrive',
  'at',
  'that',
  'exact',
  'point',
  'of',
  'my',
  'voyage',
  'where',
  'the',
  'attraction',
  'of',
  'the',
  'planet',
  'should',
  'be',
  'superseded',
  'by',
  'the',
  'attraction',
  'of',
  'the',
  'satellite',
  'or',
  'more',
  'precisely',
  'where',
  'the',
  'gravitation',
  'of',
  'the',
  'balloon',
  'toward',
  'the',
  'earth',
  'should',
  'be',
  'less',
  'powerful',
  'than',
  'its',
  'gravitation',
  'toward',
  'the',
  'moon'],
 ['to',
  'be',
  'sure',
  'i',
  'arose',
  'from',
  'a',
  'sound',
  'slumber',
  'with',
  'all',
  'my',
  'senses',
  'in',
  'confusion',
  'to',
  'the',
  'contemplation',
  'of',
  'a',
  'very',
  'startling',
  'phenomenon',
  'and',
  'one',
  'which',
  'although',
  'expected',
  'was',
  'not',
  'expected',
  'at',
  'the',
  'moment'],
 ['the',
  'revolution',
  'itself',
  'must',
  'of',
  'course',
  'have',
  'taken',
  'place',
  'in',
  'an',
  'easy',
  'and',
  'gradual',
  'manner',
  'and',
  'it',
  'is',
  'by',
  'no',
  'means',
  'clear',
  'that',
  'had',
  'i',
  'even',
  'been',
  'awake',
  'at',
  'the',
  'time',
  'of',
  'the',
  'occurrence',
  'i',
  'should',
  'have',
  'been',
  'made',
  'aware',
  'of',
  'it',
  'by',
  'any',
  'internal',
  'evidence',
  'of',
  'an',
  'inversion',
  'that',
  'is',
  'to',
  'say',
  'by',
  'any',
  'inconvenience',
  'or',
  'disarrangement',
  'either',
  'about',
  'my',
  'person',
  'or',
  'about',
  'my',
  'apparatus'],
 ['it',
  'is',
  'almost',
  'needless',
  'to',
  'say',
  'that',
  'upon',
  'coming',
  'to',
  'a',
  'due',
  'sense',
  'of',
  'my',
  'situation',
  'and',
  'emerging',
  'from',
  'the',
  'terror',
  'which',
  'had',
  'absorbed',
  'every',
  'faculty',
  'of',
  'my',
  'soul',
  'my',
  'attention',
  'was',
  'in',
  'the',
  'first',
  'place',
  'wholly',
  'directed',
  'to',
  'the',
  'contemplation',
  'of',
  'the',
  'general',
  'physical',
  'appearance',
  'of',
  'the',
  'moon'],
 ['it',
  'lay',
  'beneath',
  'me',
  'like',
  'a',
  'chart',
  'and',
  'although',
  'i',
  'judged',
  'it',
  'to',
  'be',
  'still',
  'at',
  'no',
  'inconsiderable',
  'distance',
  'the',
  'indentures',
  'of',
  'its',
  'surface',
  'were',
  'defined',
  'to',
  'my',
  'vision',
  'with',
  'a',
  'most',
  'striking',
  'and',
  'altogether',
  'unaccountable',
  'distinctness'],
 ['the',
  'entire',
  'absence',
  'of',
  'ocean',
  'or',
  'sea',
  'and',
  'indeed',
  'of',
  'any',
  'lake',
  'or',
  'river',
  'or',
  'body',
  'of',
  'water',
  'whatsoever',
  'struck',
  'me',
  'at',
  'first',
  'glance',
  'as',
  'the',
  'most',
  'extraordinary',
  'feature',
  'in',
  'its',
  'geological',
  'condition'],
 ['yet',
  'strange',
  'to',
  'say',
  'i',
  'beheld',
  'vast',
  'level',
  'regions',
  'of',
  'a',
  'character',
  'decidedly',
  'alluvial',
  'although',
  'by',
  'far',
  'the',
  'greater',
  'portion',
  'of',
  'the',
  'hemisphere',
  'in',
  'sight',
  'was',
  'covered',
  'with',
  'innumerable',
  'volcanic',
  'mountains',
  'conical',
  'in',
  'shape',
  'and',
  'having',
  'more',
  'the',
  'appearance',
  'of',
  'artificial',
  'than',
  'of',
  'natural',
  'protuberance'],
 ['the',
  'highest',
  'among',
  'them',
  'does',
  'not',
  'exceed',
  'three',
  'and',
  'threequarter',
  'miles',
  'in',
  'perpendicular',
  'elevation',
  'but',
  'a',
  'map',
  'of',
  'the',
  'volcanic',
  'districts',
  'of',
  'the',
  'would',
  'afford',
  'to',
  'your',
  'excellencies',
  'a',
  'better',
  'idea',
  'of',
  'their',
  'general',
  'surface',
  'than',
  'any',
  'unworthy',
  'description',
  'i',
  'might',
  'think',
  'proper',
  'to',
  'attempt'],
 ['the',
  'greater',
  'part',
  'of',
  'them',
  'were',
  'in',
  'a',
  'state',
  'of',
  'evident',
  'eruption',
  'and',
  'gave',
  'me',
  'fearfully',
  'to',
  'understand',
  'their',
  'fury',
  'and',
  'their',
  'power',
  'by',
  'the',
  'repeated',
  'thunders',
  'of',
  'the',
  'miscalled',
  'meteoric',
  'stones',
  'which',
  'now',
  'rushed',
  'upward',
  'by',
  'the',
  'balloon',
  'with',
  'a',
  'frequency',
  'more',
  'and',
  'more',
  'appalling'],
 ['18th'],
 ['today',
  'i',
  'found',
  'an',
  'enormous',
  'increase',
  'in',
  'the',
  'moon',
  's',
  'apparent',
  'bulk',
  'and',
  'the',
  'evidently',
  'accelerated',
  'velocity',
  'of',
  'my',
  'descent',
  'began',
  'to',
  'fill',
  'me',
  'with',
  'alarm'],
 ['it',
  'will',
  'be',
  'remembered',
  'that',
  'in',
  'the',
  'earliest',
  'stage',
  'of',
  'my',
  'speculations',
  'upon',
  'the',
  'possibility',
  'of',
  'a',
  'passage',
  'to',
  'the',
  'moon',
  'the',
  'existence',
  'in',
  'its',
  'vicinity',
  'of',
  'an',
  'atmosphere',
  'dense',
  'in',
  'proportion',
  'to',
  'the',
  'bulk',
  'of',
  'the',
  'planet',
  'had',
  'entered',
  'largely',
  'into',
  'my',
  'calculations',
  'this',
  'too',
  'in',
  'spite',
  'of',
  'many',
  'theories',
  'to',
  'the',
  'contrary',
  'and',
  'it',
  'may',
  'be',
  'added',
  'in',
  'spite',
  'of',
  'a',
  'general',
  'disbelief',
  'in',
  'the',
  'existence',
  'of',
  'any',
  'lunar',
  'atmosphere',
  'at',
  'all'],
 ['but',
  'in',
  'addition',
  'to',
  'what',
  'i',
  'have',
  'already',
  'urged',
  'in',
  'regard',
  'to',
  's',
  'comet',
  'and',
  'the',
  'zodiacal',
  'light',
  'i',
  'had',
  'been',
  'strengthened',
  'in',
  'my',
  'opinion',
  'by',
  'certain',
  'observations',
  'of',
  'of'],
 ['he',
  'observed',
  'the',
  'moon',
  'when',
  'two',
  'days',
  'and',
  'a',
  'half',
  'old',
  'in',
  'the',
  'evening',
  'soon',
  'after',
  'sunset',
  'before',
  'the',
  'dark',
  'part',
  'was',
  'visible',
  'and',
  'continued',
  'to',
  'watch',
  'it',
  'until',
  'it',
  'became',
  'visible'],
 ['the',
  'two',
  'cusps',
  'appeared',
  'tapering',
  'in',
  'a',
  'very',
  'sharp',
  'faint',
  'prolongation',
  'each',
  'exhibiting',
  'its',
  'farthest',
  'extremity',
  'faintly',
  'illuminated',
  'by',
  'the',
  'solar',
  'rays',
  'before',
  'any',
  'part',
  'of',
  'the',
  'dark',
  'hemisphere',
  'was',
  'visible'],
 ['soon',
  'afterward',
  'the',
  'whole',
  'dark',
  'limb',
  'became',
  'illuminated'],
 ['this',
  'prolongation',
  'of',
  'the',
  'cusps',
  'beyond',
  'the',
  'semicircle',
  'i',
  'thought',
  'must',
  'have',
  'arisen',
  'from',
  'the',
  'refraction',
  'of',
  'the',
  'sun',
  's',
  'rays',
  'by',
  'the',
  'moon',
  's',
  'atmosphere'],
 ['i',
  'computed',
  'also',
  'the',
  'height',
  'of',
  'the',
  'atmosphere',
  'which',
  'could',
  'refract',
  'light',
  'enough',
  'into',
  'its',
  'dark',
  'hemisphere',
  'to',
  'produce',
  'a',
  'twilight',
  'more',
  'luminous',
  'than',
  'the',
  'light',
  'reflected',
  'from',
  'the',
  'earth',
  'when',
  'the',
  'moon',
  'is',
  'about',
  '32',
  'degrees',
  'from',
  'the',
  'new',
  'to',
  'be',
  '1356',
  'feet',
  'in',
  'this',
  'view',
  'i',
  'supposed',
  'the',
  'greatest',
  'height',
  'capable',
  'of',
  'refracting',
  'the',
  'solar',
  'ray',
  'to',
  'be',
  '5376',
  'feet'],
 ['my',
  'ideas',
  'on',
  'this',
  'topic',
  'had',
  'also',
  'received',
  'confirmation',
  'by',
  'a',
  'passage',
  'in',
  'the',
  'eightysecond',
  'volume',
  'of',
  'the',
  'in',
  'which',
  'it',
  'is',
  'stated',
  'that',
  'at',
  'an',
  'occultation',
  'of',
  's',
  'satellites',
  'the',
  'third',
  'disappeared',
  'after',
  'having',
  'been',
  'about',
  '1',
  'or',
  '2',
  'of',
  'time',
  'indistinct',
  'and',
  'the',
  'fourth',
  'became',
  'indiscernible',
  'near',
  'the',
  'limb'],
 ['4'],
 ['frequently',
  'observed',
  'and',
  'the',
  'fixed',
  'stars',
  'when',
  'approaching',
  'the',
  'moon',
  'to',
  'occultation',
  'to',
  'have',
  'their',
  'circular',
  'figure',
  'changed',
  'into',
  'an',
  'oval',
  'one',
  'and',
  'in',
  'other',
  'occultations',
  'he',
  'found',
  'no',
  'alteration',
  'of',
  'figure',
  'at',
  'all'],
 ['it',
  'might',
  'be',
  'supposed',
  'that',
  'at',
  'some',
  'times',
  'and',
  'not',
  'at',
  'others',
  'there',
  'is',
  'a',
  'dense',
  'matter',
  'encompassing',
  'the',
  'moon',
  'wherein',
  'the',
  'rays',
  'of',
  'the',
  'stars',
  'are',
  'refracted'],
 ['upon',
  'the',
  'resistance',
  'or',
  'more',
  'properly',
  'upon',
  'the',
  'support',
  'of',
  'an',
  'atmosphere',
  'existing',
  'in',
  'the',
  'state',
  'of',
  'density',
  'imagined',
  'i',
  'had',
  'of',
  'course',
  'entirely',
  'depended',
  'for',
  'the',
  'safety',
  'of',
  'my',
  'ultimate',
  'descent'],
 ['should',
  'i',
  'then',
  'after',
  'all',
  'prove',
  'to',
  'have',
  'been',
  'mistaken',
  'i',
  'had',
  'in',
  'consequence',
  'nothing',
  'better',
  'to',
  'expect',
  'as',
  'a',
  'finale',
  'to',
  'my',
  'adventure',
  'than',
  'being',
  'dashed',
  'into',
  'atoms',
  'against',
  'the',
  'rugged',
  'surface',
  'of',
  'the',
  'satellite'],
 ['and',
  'indeed',
  'i',
  'had',
  'now',
  'every',
  'reason',
  'to',
  'be',
  'terrified'],
 ['my',
  'distance',
  'from',
  'the',
  'moon',
  'was',
  'comparatively',
  'trifling',
  'while',
  'the',
  'labor',
  'required',
  'by',
  'the',
  'condenser',
  'was',
  'diminished',
  'not',
  'at',
  'all',
  'and',
  'i',
  'could',
  'discover',
  'no',
  'indication',
  'whatever',
  'of',
  'a',
  'decreasing',
  'rarity',
  'in',
  'the',
  'air'],
 ['19th'],
 ['this',
  'morning',
  'to',
  'my',
  'great',
  'joy',
  'about',
  'nine',
  'o',
  'clock',
  'the',
  'surface',
  'of',
  'the',
  'moon',
  'being',
  'frightfully',
  'near',
  'and',
  'my',
  'apprehensions',
  'excited',
  'to',
  'the',
  'utmost',
  'the',
  'pump',
  'of',
  'my',
  'condenser',
  'at',
  'length',
  'gave',
  'evident',
  'tokens',
  'of',
  'an',
  'alteration',
  'in',
  'the',
  'atmosphere'],
 ['by',
  'ten',
  'i',
  'had',
  'reason',
  'to',
  'believe',
  'its',
  'density',
  'considerably',
  'increased'],
 ['by',
  'eleven',
  'very',
  'little',
  'labor',
  'was',
  'necessary',
  'at',
  'the',
  'apparatus',
  'and',
  'at',
  'twelve',
  'o',
  'clock',
  'with',
  'some',
  'hesitation',
  'i',
  'ventured',
  'to',
  'unscrew',
  'the',
  'tourniquet',
  'when',
  'finding',
  'no',
  'inconvenience',
  'from',
  'having',
  'done',
  'so',
  'i',
  'finally',
  'threw',
  'open',
  'the',
  'gumelastic',
  'chamber',
  'and',
  'unrigged',
  'it',
  'from',
  'around',
  'the',
  'car'],
 ['as',
  'might',
  'have',
  'been',
  'expected',
  'spasms',
  'and',
  'violent',
  'headache',
  'were',
  'the',
  'immediate',
  'consequences',
  'of',
  'an',
  'experiment',
  'so',
  'precipitate',
  'and',
  'full',
  'of',
  'danger'],
 ['but',
  'these',
  'and',
  'other',
  'difficulties',
  'attending',
  'respiration',
  'as',
  'they',
  'were',
  'by',
  'no',
  'means',
  'so',
  'great',
  'as',
  'to',
  'put',
  'me',
  'in',
  'peril',
  'of',
  'my',
  'life',
  'i',
  'determined',
  'to',
  'endure',
  'as',
  'i',
  'best',
  'could',
  'in',
  'consideration',
  'of',
  'my',
  'leaving',
  'them',
  'behind',
  'me',
  'momently',
  'in',
  'my',
  'approach',
  'to',
  'the',
  'denser',
  'strata',
  'near',
  'the',
  'moon'],
 ['this',
  'approach',
  'however',
  'was',
  'still',
  'impetuous',
  'in',
  'the',
  'extreme',
  'and',
  'it',
  'soon',
  'became',
  'alarmingly',
  'certain',
  'that',
  'although',
  'i',
  'had',
  'probably',
  'not',
  'been',
  'deceived',
  'in',
  'the',
  'expectation',
  'of',
  'an',
  'atmosphere',
  'dense',
  'in',
  'proportion',
  'to',
  'the',
  'mass',
  'of',
  'the',
  'satellite',
  'still',
  'i',
  'had',
  'been',
  'wrong',
  'in',
  'supposing',
  'this',
  'density',
  'even',
  'at',
  'the',
  'surface',
  'at',
  'all',
  'adequate',
  'to',
  'the',
  'support',
  'of',
  'the',
  'great',
  'weight',
  'contained',
  'in',
  'the',
  'car',
  'of',
  'my',
  'balloon'],
 ['yet',
  'this',
  'should',
  'have',
  'been',
  'the',
  'case',
  'and',
  'in',
  'an',
  'equal',
  'degree',
  'as',
  'at',
  'the',
  'surface',
  'of',
  'the',
  'earth',
  'the',
  'actual',
  'gravity',
  'of',
  'bodies',
  'at',
  'either',
  'planet',
  'supposed',
  'in',
  'the',
  'ratio',
  'of',
  'the',
  'atmospheric',
  'condensation'],
 ['that',
  'it',
  'was',
  'not',
  'the',
  'case',
  'however',
  'my',
  'precipitous',
  'downfall',
  'gave',
  'testimony',
  'enough',
  'why',
  'it',
  'was',
  'not',
  'so',
  'can',
  'only',
  'be',
  'explained',
  'by',
  'a',
  'reference',
  'to',
  'those',
  'possible',
  'geological',
  'disturbances',
  'to',
  'which',
  'i',
  'have',
  'formerly',
  'alluded'],
 ['at',
  'all',
  'events',
  'i',
  'was',
  'now',
  'close',
  'upon',
  'the',
  'planet',
  'and',
  'coming',
  'down',
  'with',
  'the',
  'most',
  'terrible',
  'impetuosity'],
 ['i',
  'lost',
  'not',
  'a',
  'moment',
  'accordingly',
  'in',
  'throwing',
  'overboard',
  'first',
  'my',
  'ballast',
  'then',
  'my',
  'waterkegs',
  'then',
  'my',
  'condensing',
  'apparatus',
  'and',
  'gumelastic',
  'chamber',
  'and',
  'finally',
  'every',
  'article',
  'within',
  'the',
  'car'],
 ['but', 'it', 'was', 'all', 'to', 'no', 'purpose'],
 ['i',
  'still',
  'fell',
  'with',
  'horrible',
  'rapidity',
  'and',
  'was',
  'now',
  'not',
  'more',
  'than',
  'half',
  'a',
  'mile',
  'from',
  'the',
  'surface'],
 ['as',
  'a',
  'last',
  'resource',
  'therefore',
  'having',
  'got',
  'rid',
  'of',
  'my',
  'coat',
  'hat',
  'and',
  'boots',
  'i',
  'cut',
  'loose',
  'from',
  'the',
  'balloon',
  'the',
  'car',
  'itself',
  'which',
  'was',
  'of',
  'no',
  'inconsiderable',
  'weight',
  'and',
  'thus',
  'clinging',
  'with',
  'both',
  'hands',
  'to',
  'the',
  'network',
  'i',
  'had',
  'barely',
  'time',
  'to',
  'observe',
  'that',
  'the',
  'whole',
  'country',
  'as',
  'far',
  'as',
  'the',
  'eye',
  'could',
  'reach',
  'was',
  'thickly',
  'interspersed',
  'with',
  'diminutive',
  'habitations',
  'ere',
  'i',
  'tumbled',
  'headlong',
  'into',
  'the',
  'very',
  'heart',
  'of',
  'a',
  'fantasticallooking',
  'city',
  'and',
  'into',
  'the',
  'middle',
  'of',
  'a',
  'vast',
  'crowd',
  'of',
  'ugly',
  'little',
  'people',
  'who',
  'none',
  'of',
  'them',
  'uttered',
  'a',
  'single',
  'syllable',
  'or',
  'gave',
  'themselves',
  'the',
  'least',
  'trouble',
  'to',
  'render',
  'me',
  'assistance',
  'but',
  'stood',
  'like',
  'a',
  'parcel',
  'of',
  'idiots',
  'grinning',
  'in',
  'a',
  'ludicrous',
  'manner',
  'and',
  'eyeing',
  'me',
  'and',
  'my',
  'balloon',
  'askant',
  'with',
  'their',
  'arms',
  'set',
  'akimbo'],
 ['i',
  'turned',
  'from',
  'them',
  'in',
  'contempt',
  'and',
  'gazing',
  'upward',
  'at',
  'the',
  'earth',
  'so',
  'lately',
  'left',
  'and',
  'left',
  'perhaps',
  'for',
  'ever',
  'beheld',
  'it',
  'like',
  'a',
  'huge',
  'dull',
  'copper',
  'shield',
  'about',
  'two',
  'degrees',
  'in',
  'diameter',
  'fixed',
  'immovably',
  'in',
  'the',
  'heavens',
  'overhead',
  'and',
  'tipped',
  'on',
  'one',
  'of',
  'its',
  'edges',
  'with',
  'a',
  'crescent',
  'border',
  'of',
  'the',
  'most',
  'brilliant',
  'gold'],
 ['no',
  'traces',
  'of',
  'land',
  'or',
  'water',
  'could',
  'be',
  'discovered',
  'and',
  'the',
  'whole',
  'was',
  'clouded',
  'with',
  'variable',
  'spots',
  'and',
  'belted',
  'with',
  'tropical',
  'and',
  'equatorial',
  'zones'],
 ['may',
  'it',
  'please',
  'your',
  'excellencies',
  'after',
  'a',
  'series',
  'of',
  'great',
  'anxieties',
  'unheard',
  'of',
  'dangers',
  'and',
  'unparalleled',
  'escapes',
  'i',
  'had',
  'at',
  'length',
  'on',
  'the',
  'nineteenth',
  'day',
  'of',
  'my',
  'departure',
  'from',
  'arrived',
  'in',
  'safety',
  'at',
  'the',
  'conclusion',
  'of',
  'a',
  'voyage',
  'undoubtedly',
  'the',
  'most',
  'extraordinary',
  'and',
  'the',
  'most',
  'momentous',
  'ever',
  'accomplished',
  'undertaken',
  'or',
  'conceived',
  'by',
  'any',
  'denizen',
  'of',
  'earth'],
 ['but', 'my', 'adventures', 'yet', 'remain', 'to', 'be', 'related'],
 ['and',
  'indeed',
  'your',
  'excellencies',
  'may',
  'well',
  'imagine',
  'that',
  'after',
  'a',
  'residence',
  'of',
  'five',
  'years',
  'upon',
  'a',
  'planet',
  'not',
  'only',
  'deeply',
  'interesting',
  'in',
  'its',
  'own',
  'peculiar',
  'character',
  'but',
  'rendered',
  'doubly',
  'so',
  'by',
  'its',
  'intimate',
  'connection',
  'in',
  'capacity',
  'of',
  'satellite',
  'with',
  'the',
  'world',
  'inhabited',
  'by',
  'man',
  'i',
  'may',
  'have',
  'intelligence',
  'for',
  'the',
  'private',
  'ear',
  'of',
  'the',
  'of',
  'of',
  'far',
  'more',
  'importance',
  'than',
  'the',
  'details',
  'however',
  'wonderful',
  'of',
  'the',
  'mere',
  'voyage',
  'which',
  'so',
  'happily',
  'concluded'],
 ['this', 'is', 'in', 'fact', 'the', 'case'],
 ['i',
  'have',
  'much',
  'very',
  'much',
  'which',
  'it',
  'would',
  'give',
  'me',
  'the',
  'greatest',
  'pleasure',
  'to',
  'communicate'],
 ['i',
  'have',
  'much',
  'to',
  'say',
  'of',
  'the',
  'climate',
  'of',
  'the',
  'planet',
  'of',
  'its',
  'wonderful',
  'alternations',
  'of',
  'heat',
  'and',
  'cold',
  'of',
  'unmitigated',
  'and',
  'burning',
  'sunshine',
  'for',
  'one',
  'fortnight',
  'and',
  'more',
  'than',
  'polar',
  'frigidity',
  'for',
  'the',
  'next',
  'of',
  'a',
  'constant',
  'transfer',
  'of',
  'moisture',
  'by',
  'distillation',
  'like',
  'that',
  'in',
  'vacuo',
  'from',
  'the',
  'point',
  'beneath',
  'the',
  'sun',
  'to',
  'the',
  'point',
  'the',
  'farthest',
  'from',
  'it',
  'of',
  'a',
  'variable',
  'zone',
  'of',
  'running',
  'water',
  'of',
  'the',
  'people',
  'themselves',
  'of',
  'their',
  'manners',
  'customs',
  'and',
  'political',
  'institutions',
  'of',
  'their',
  'peculiar',
  'physical',
  'construction',
  'of',
  'their',
  'ugliness',
  'of',
  'their',
  'want',
  'of',
  'ears',
  'those',
  'useless',
  'appendages',
  'in',
  'an',
  'atmosphere',
  'so',
  'peculiarly',
  'modified',
  'of',
  'their',
  'consequent',
  'ignorance',
  'of',
  'the',
  'use',
  'and',
  'properties',
  'of',
  'speech',
  'of',
  'their',
  'substitute',
  'for',
  'speech',
  'in',
  'a',
  'singular',
  'method',
  'of',
  'intercommunication',
  'of',
  'the',
  'incomprehensible',
  'connection',
  'between',
  'each',
  'particular',
  'individual',
  'in',
  'the',
  'moon',
  'with',
  'some',
  'particular',
  'individual',
  'on',
  'the',
  'earth',
  'a',
  'connection',
  'analogous',
  'with',
  'and',
  'depending',
  'upon',
  'that',
  'of',
  'the',
  'orbs',
  'of',
  'the',
  'planet',
  'and',
  'the',
  'satellites',
  'and',
  'by',
  'means',
  'of',
  'which',
  'the',
  'lives',
  'and',
  'destinies',
  'of',
  'the',
  'inhabitants',
  'of',
  'the',
  'one',
  'are',
  'interwoven',
  'with',
  'the',
  'lives',
  'and',
  'destinies',
  'of',
  'the',
  'inhabitants',
  'of',
  'the',
  'other',
  'and',
  'above',
  'all',
  'if',
  'it',
  'so',
  'please',
  'your',
  'excellencies',
  'above',
  'all',
  'of',
  'those',
  'dark',
  'and',
  'hideous',
  'mysteries',
  'which',
  'lie',
  'in',
  'the',
  'outer',
  'regions',
  'of',
  'the',
  'moon',
  'regions',
  'which',
  'owing',
  'to',
  'the',
  'almost',
  'miraculous',
  'accordance',
  'of',
  'the',
  'satellite',
  's',
  'rotation',
  'on',
  'its',
  'own',
  'axis',
  'with',
  'its',
  'sidereal',
  'revolution',
  'about',
  'the',
  'earth',
  'have',
  'never',
  'yet',
  'been',
  'turned',
  'and',
  'by',
  's',
  'mercy',
  'never',
  'shall',
  'be',
  'turned',
  'to',
  'the',
  'scrutiny',
  'of',
  'the',
  'telescopes',
  'of',
  'man'],
 ['all',
  'this',
  'and',
  'more',
  'much',
  'more',
  'would',
  'i',
  'most',
  'willingly',
  'detail'],
 ['but', 'to', 'be', 'brief', 'i', 'must', 'have', 'my', 'reward'],
 ['i',
  'am',
  'pining',
  'for',
  'a',
  'return',
  'to',
  'my',
  'family',
  'and',
  'to',
  'my',
  'home',
  'and',
  'as',
  'the',
  'price',
  'of',
  'any',
  'farther',
  'communication',
  'on',
  'my',
  'part',
  'in',
  'consideration',
  'of',
  'the',
  'light',
  'which',
  'i',
  'have',
  'it',
  'in',
  'my',
  'power',
  'to',
  'throw',
  'upon',
  'many',
  'very',
  'important',
  'branches',
  'of',
  'physical',
  'and',
  'metaphysical',
  'science',
  'i',
  'must',
  'solicit',
  'through',
  'the',
  'influence',
  'of',
  'your',
  'honorable',
  'body',
  'a',
  'pardon',
  'for',
  'the',
  'crime',
  'of',
  'which',
  'i',
  'have',
  'been',
  'guilty',
  'in',
  'the',
  'death',
  'of',
  'the',
  'creditors',
  'upon',
  'my',
  'departure',
  'from'],
 ['this', 'then', 'is', 'the', 'object', 'of', 'the', 'present', 'paper'],
 ['its',
  'bearer',
  'an',
  'inhabitant',
  'of',
  'the',
  'moon',
  'whom',
  'i',
  'have',
  'prevailed',
  'upon',
  'and',
  'properly',
  'instructed',
  'to',
  'be',
  'my',
  'messenger',
  'to',
  'the',
  'earth',
  'will',
  'await',
  'your',
  'excellencies',
  'pleasure',
  'and',
  'return',
  'to',
  'me',
  'with',
  'the',
  'pardon',
  'in',
  'question',
  'if',
  'it',
  'can',
  'in',
  'any',
  'manner',
  'be',
  'obtained'],
 ['i',
  'have',
  'the',
  'honor',
  'to',
  'be',
  'etc',
  'your',
  'excellencies',
  'very',
  'humble',
  'servant'],
 ['upon',
  'finishing',
  'the',
  'perusal',
  'of',
  'this',
  'very',
  'extraordinary',
  'document',
  'it',
  'is',
  'said',
  'dropped',
  'his',
  'pipe',
  'upon',
  'the',
  'ground',
  'in',
  'the',
  'extremity',
  'of',
  'his',
  'surprise',
  'and',
  'having',
  'taken',
  'off',
  'his',
  'spectacles',
  'wiped',
  'them',
  'and',
  'deposited',
  'them',
  'in',
  'his',
  'pocket',
  'so',
  'far',
  'forgot',
  'both',
  'himself',
  'and',
  'his',
  'dignity',
  'as',
  'to',
  'turn',
  'round',
  'three',
  'times',
  'upon',
  'his',
  'heel',
  'in',
  'the',
  'quintessence',
  'of',
  'astonishment',
  'and',
  'admiration'],
 ['there',
  'was',
  'no',
  'doubt',
  'about',
  'the',
  'matter',
  'the',
  'pardon',
  'should',
  'be',
  'obtained'],
 ['so',
  'at',
  'least',
  'swore',
  'with',
  'a',
  'round',
  'oath',
  'and',
  'so',
  'finally',
  'thought',
  'the',
  'illustrious',
  'as',
  'he',
  'took',
  'the',
  'arm',
  'of',
  'his',
  'brother',
  'in',
  'science',
  'and',
  'without',
  'saying',
  'a',
  'word',
  'began',
  'to',
  'make',
  'the',
  'best',
  'of',
  'his',
  'way',
  'home',
  'to',
  'deliberate',
  'upon',
  'the',
  'measures',
  'to',
  'be',
  'adopted'],
 ['having',
  'reached',
  'the',
  'door',
  'however',
  'of',
  'the',
  'burgomaster',
  's',
  'dwelling',
  'the',
  'professor',
  'ventured',
  'to',
  'suggest',
  'that',
  'as',
  'the',
  'messenger',
  'had',
  'thought',
  'proper',
  'to',
  'disappear',
  'no',
  'doubt',
  'frightened',
  'to',
  'death',
  'by',
  'the',
  'savage',
  'appearance',
  'of',
  'the',
  'burghers',
  'of',
  'the',
  'pardon',
  'would',
  'be',
  'of',
  'little',
  'use',
  'as',
  'no',
  'one',
  'but',
  'a',
  'man',
  'of',
  'the',
  'moon',
  'would',
  'undertake',
  'a',
  'voyage',
  'to',
  'so',
  'vast',
  'a',
  'distance'],
 ['to',
  'the',
  'truth',
  'of',
  'this',
  'observation',
  'the',
  'burgomaster',
  'assented',
  'and',
  'the',
  'matter',
  'was',
  'therefore',
  'at',
  'an',
  'end'],
 ['not', 'so', 'however', 'rumors', 'and', 'speculations'],
 ['the',
  'letter',
  'having',
  'been',
  'published',
  'gave',
  'rise',
  'to',
  'a',
  'variety',
  'of',
  'gossip',
  'and',
  'opinion'],
 ['some',
  'of',
  'the',
  'overwise',
  'even',
  'made',
  'themselves',
  'ridiculous',
  'by',
  'decrying',
  'the',
  'whole',
  'business',
  'as',
  'nothing',
  'better',
  'than',
  'a',
  'hoax'],
 ['but',
  'hoax',
  'with',
  'these',
  'sort',
  'of',
  'people',
  'is',
  'i',
  'believe',
  'a',
  'general',
  'term',
  'for',
  'all',
  'matters',
  'above',
  'their',
  'comprehension'],
 ['for',
  'my',
  'part',
  'i',
  'can',
  'not',
  'conceive',
  'upon',
  'what',
  'data',
  'they',
  'have',
  'founded',
  'such',
  'an',
  'accusation'],
 ['let', 'us', 'see', 'what', 'they', 'say'],
 ['that',
  'certain',
  'wags',
  'in',
  'have',
  'certain',
  'especial',
  'antipathies',
  'to',
  'certain',
  'burgomasters',
  'and',
  'astronomers'],
 ['t', 'understand', 'at', 'all'],
 ['secondly'],
 ['that',
  'an',
  'odd',
  'little',
  'dwarf',
  'and',
  'bottle',
  'conjurer',
  'both',
  'of',
  'whose',
  'ears',
  'for',
  'some',
  'misdemeanor',
  'have',
  'been',
  'cut',
  'off',
  'close',
  'to',
  'his',
  'head',
  'has',
  'been',
  'missing',
  'for',
  'several',
  'days',
  'from',
  'the',
  'neighboring',
  'city',
  'of'],
 ['well', 'what', 'of', 'that'],
 ['thirdly'],
 ['that',
  'the',
  'newspapers',
  'which',
  'were',
  'stuck',
  'all',
  'over',
  'the',
  'little',
  'balloon',
  'were',
  'newspapers',
  'of',
  'and',
  'therefore',
  'could',
  'not',
  'have',
  'been',
  'made',
  'in',
  'the',
  'moon'],
 ['they',
  'were',
  'dirty',
  'papers',
  'very',
  'dirty',
  'and',
  'the',
  'printer',
  'would',
  'take',
  'his',
  'bible',
  'oath',
  'to',
  'their',
  'having',
  'been',
  'printed',
  'in'],
 ['he', 'was', 'mistaken', 'undoubtedly', 'mistaken'],
 ['fourthly',
  'that',
  'hans',
  'himself',
  'the',
  'drunken',
  'villain',
  'and',
  'the',
  'three',
  'very',
  'idle',
  'gentlemen',
  'styled',
  'his',
  'creditors',
  'were',
  'all',
  'seen',
  'no',
  'longer',
  'than',
  'two',
  'or',
  'three',
  'days',
  'ago',
  'in',
  'a',
  'tippling',
  'house',
  'in',
  'the',
  'suburbs',
  'having',
  'just',
  'returned',
  'with',
  'money',
  'in',
  'their',
  'pockets',
  'from',
  'a',
  'trip',
  'beyond',
  'the',
  'sea'],
 ['t', 'believe', 'it', 'don', 't', 'believe', 'a', 'word', 'of', 'it'],
 ['lastly'],
 ['that',
  'it',
  'is',
  'an',
  'opinion',
  'very',
  'generally',
  'received',
  'or',
  'which',
  'ought',
  'to',
  'be',
  'generally',
  'received',
  'that',
  'the',
  'of',
  'in',
  'the',
  'city',
  'of',
  'as',
  'well',
  'as',
  'other',
  'colleges',
  'in',
  'all',
  'other',
  'parts',
  'of',
  'the',
  'world',
  'not',
  'to',
  'mention',
  'colleges',
  'and',
  'astronomers',
  'in',
  'general',
  'are',
  'to',
  'say',
  'the',
  'least',
  'of',
  'the',
  'matter',
  'not',
  'a',
  'whit',
  'better',
  'nor',
  'greater',
  'nor',
  'wiser',
  'than',
  'they',
  'ought',
  'to',
  'be'],
 ['end', 'of'],
 ['notes', 'to'],
 ['1',
  'strictly',
  'speaking',
  'there',
  'is',
  'but',
  'little',
  'similarity',
  'between',
  'the',
  'above',
  'sketchy',
  'trifle',
  'and',
  'the',
  'celebrated',
  'moonstory',
  'of',
  'but',
  'as',
  'both',
  'have',
  'the',
  'character',
  'of',
  'hoaxes',
  'although',
  'the',
  'one',
  'is',
  'in',
  'a',
  'tone',
  'of',
  'banter',
  'the',
  'other',
  'of',
  'downright',
  'earnest',
  'and',
  'as',
  'both',
  'hoaxes',
  'are',
  'on',
  'the',
  'same',
  'subject',
  'the',
  'moon',
  'moreover',
  'as',
  'both',
  'attempt',
  'to',
  'give',
  'plausibility',
  'by',
  'scientific',
  'detail',
  'the',
  'author',
  'of',
  'thinks',
  'it',
  'necessary',
  'to',
  'say',
  'in',
  'selfdefence',
  'that',
  'his',
  'own',
  'd',
  'esprit',
  'was',
  'published',
  'in',
  'the',
  'about',
  'three',
  'weeks',
  'before',
  'the',
  'commencement',
  'of',
  's',
  'in',
  'the',
  'fancying',
  'a',
  'likeness',
  'which',
  'perhaps',
  'does',
  'not',
  'exist',
  'some',
  'of',
  'the',
  'papers',
  'copied',
  'and',
  'collated',
  'it',
  'with',
  'the',
  'by',
  'way',
  'of',
  'detecting',
  'the',
  'writer',
  'of',
  'the',
  'one',
  'in',
  'the',
  'writer',
  'of',
  'the',
  'other'],
 ['as',
  'many',
  'more',
  'persons',
  'were',
  'actually',
  'gulled',
  'by',
  'the',
  'than',
  'would',
  'be',
  'willing',
  'to',
  'acknowledge',
  'the',
  'fact',
  'it',
  'may',
  'here',
  'afford',
  'some',
  'little',
  'amusement',
  'to',
  'show',
  'why',
  'no',
  'one',
  'should',
  'have',
  'been',
  'deceivedto',
  'point',
  'out',
  'those',
  'particulars',
  'of',
  'the',
  'story',
  'which',
  'should',
  'have',
  'been',
  'sufficient',
  'to',
  'establish',
  'its',
  'real',
  'character'],
 ['indeed',
  'however',
  'rich',
  'the',
  'imagination',
  'displayed',
  'in',
  'this',
  'ingenious',
  'fiction',
  'it',
  'wanted',
  'much',
  'of',
  'the',
  'force',
  'which',
  'might',
  'have',
  'been',
  'given',
  'it',
  'by',
  'a',
  'more',
  'scrupulous',
  'attention',
  'to',
  'facts',
  'and',
  'to',
  'general',
  'analogy'],
 ['that',
  'the',
  'public',
  'were',
  'misled',
  'even',
  'for',
  'an',
  'instant',
  'merely',
  'proves',
  'the',
  'gross',
  'ignorance',
  'which',
  'is',
  'so',
  'generally',
  'prevalent',
  'upon',
  'subjects',
  'of',
  'an',
  'astronomical',
  'nature'],
 ['the',
  'moon',
  's',
  'distance',
  'from',
  'the',
  'earth',
  'is',
  'in',
  'round',
  'numbers',
  '240000',
  'miles'],
 ['if',
  'we',
  'desire',
  'to',
  'ascertain',
  'how',
  'near',
  'apparently',
  'a',
  'lens',
  'would',
  'bring',
  'the',
  'satellite',
  'or',
  'any',
  'distant',
  'object',
  'we',
  'of',
  'course',
  'have',
  'but',
  'to',
  'divide',
  'the',
  'distance',
  'by',
  'the',
  'magnifying',
  'or',
  'more',
  'strictly',
  'by',
  'the',
  'spacepenetrating',
  'power',
  'of',
  'the',
  'glass'],
 ['makes', 'his', 'lens', 'have', 'a', 'power', 'of', '42000', 'times'],
 ['by',
  'this',
  'divide',
  '240000',
  'the',
  'moon',
  's',
  'real',
  'distance',
  'and',
  'we',
  'have',
  'five',
  'miles',
  'and',
  'five',
  'sevenths',
  'as',
  'the',
  'apparent',
  'distance'],
 ['no',
  'animal',
  'at',
  'all',
  'could',
  'be',
  'seen',
  'so',
  'far',
  'much',
  'less',
  'the',
  'minute',
  'points',
  'particularized',
  'in',
  'the',
  'story'],
 ['speaks', 'about', 's', 'perceiving', 'flowers', 'the', 'rheas', 'etc'],
 ['and',
  'even',
  'detecting',
  'the',
  'color',
  'and',
  'the',
  'shape',
  'of',
  'the',
  'eyes',
  'of',
  'small',
  'birds'],
 ['shortly',
  'before',
  'too',
  'he',
  'has',
  'himself',
  'observed',
  'that',
  'the',
  'lens',
  'would',
  'not',
  'render',
  'perceptible',
  'objects',
  'of',
  'less',
  'than',
  'eighteen',
  'inches',
  'in',
  'diameter',
  'but',
  'even',
  'this',
  'as',
  'i',
  'have',
  'said',
  'is',
  'giving',
  'the',
  'glass',
  'by',
  'far',
  'too',
  'great',
  'power'],
 ['it',
  'may',
  'be',
  'observed',
  'in',
  'passing',
  'that',
  'this',
  'prodigious',
  'glass',
  'is',
  'said',
  'to',
  'have',
  'been',
  'molded',
  'at',
  'the',
  'glasshouse',
  'of',
  'and',
  'in',
  'but',
  'and',
  's',
  'establishment',
  'had',
  'ceased',
  'operations',
  'for',
  'many',
  'years',
  'previous',
  'to',
  'the',
  'publication',
  'of',
  'the',
  'hoax'],
 ['on',
  'page',
  '13',
  'pamphlet',
  'edition',
  'speaking',
  'of',
  'a',
  'hairy',
  'veil',
  'over',
  'the',
  'eyes',
  'of',
  'a',
  'species',
  'of',
  'bison',
  'the',
  'author',
  'says',
  'it',
  'immediately',
  'occurred',
  'to',
  'the',
  'acute',
  'mind',
  'of',
  'that',
  'this',
  'was',
  'a',
  'providential',
  'contrivance',
  'to',
  'protect',
  'the',
  'eyes',
  'of',
  'the',
  'animal',
  'from',
  'the',
  'great',
  'extremes',
  'of',
  'light',
  'and',
  'darkness',
  'to',
  'which',
  'all',
  'the',
  'inhabitants',
  'of',
  'our',
  'side',
  'of',
  'the',
  'moon',
  'are',
  'periodically',
  'subjected',
  'but',
  'this',
  'can',
  'not',
  'be',
  'thought',
  'a',
  'very',
  'acute',
  'observation',
  'of',
  'the',
  's'],
 ['the',
  'inhabitants',
  'of',
  'our',
  'side',
  'of',
  'the',
  'moon',
  'have',
  'evidently',
  'no',
  'darkness',
  'at',
  'all',
  'so',
  'there',
  'can',
  'be',
  'nothing',
  'of',
  'the',
  'extremes',
  'mentioned'],
 ['in',
  'the',
  'absence',
  'of',
  'the',
  'sun',
  'they',
  'have',
  'a',
  'light',
  'from',
  'the',
  'earth',
  'equal',
  'to',
  'that',
  'of',
  'thirteen',
  'full',
  'unclouded',
  'moons'],
 ['the',
  'topography',
  'throughout',
  'even',
  'when',
  'professing',
  'to',
  'accord',
  'with',
  's',
  'is',
  'entirely',
  'at',
  'variance',
  'with',
  'that',
  'or',
  'any',
  'other',
  'lunar',
  'chart',
  'and',
  'even',
  'grossly',
  'at',
  'variance',
  'with',
  'itself'],
 ['the',
  'points',
  'of',
  'the',
  'compass',
  'too',
  'are',
  'in',
  'inextricable',
  'confusion',
  'the',
  'writer',
  'appearing',
  'to',
  'be',
  'ignorant',
  'that',
  'on',
  'a',
  'lunar',
  'map',
  'these',
  'are',
  'not',
  'in',
  'accordance',
  'with',
  'terrestrial',
  'points',
  'the',
  'east',
  'being',
  'to',
  'the',
  'left',
  'etc'],
 ['perhaps',
  'by',
  'the',
  'vague',
  'titles',
  'etc',
  'given',
  'to',
  'the',
  'dark',
  'spots',
  'by',
  'former',
  'astronomers',
  'has',
  'entered',
  'into',
  'details',
  'regarding',
  'oceans',
  'and',
  'other',
  'large',
  'bodies',
  'of',
  'water',
  'in',
  'the',
  'moon',
  'whereas',
  'there',
  'is',
  'no',
  'astronomical',
  'point',
  'more',
  'positively',
  'ascertained',
  'than',
  'that',
  'no',
  'such',
  'bodies',
  'exist',
  'there'],
 ['in',
  'examining',
  'the',
  'boundary',
  'between',
  'light',
  'and',
  'darkness',
  'in',
  'the',
  'crescent',
  'or',
  'gibbous',
  'moon',
  'where',
  'this',
  'boundary',
  'crosses',
  'any',
  'of',
  'the',
  'dark',
  'places',
  'the',
  'line',
  'of',
  'division',
  'is',
  'found',
  'to',
  'be',
  'rough',
  'and',
  'jagged',
  'but',
  'were',
  'these',
  'dark',
  'places',
  'liquid',
  'it',
  'would',
  'evidently',
  'be',
  'even'],
 ['the',
  'description',
  'of',
  'the',
  'wings',
  'of',
  'the',
  'manbat',
  'on',
  'page',
  '21',
  'is',
  'but',
  'a',
  'literal',
  'copy',
  'of',
  'account',
  'of',
  'the',
  'wings',
  'of',
  'his',
  'flying',
  'islanders'],
 ['this',
  'simple',
  'fact',
  'should',
  'have',
  'induced',
  'suspicion',
  'at',
  'least',
  'it',
  'might',
  'be',
  'thought'],
 ['on',
  'page',
  '23',
  'we',
  'have',
  'the',
  'following',
  'what',
  'a',
  'prodigious',
  'influence',
  'must',
  'our',
  'thirteen',
  'times',
  'larger',
  'globe',
  'have',
  'exercised',
  'upon',
  'this',
  'satellite',
  'when',
  'an',
  'embryo',
  'in',
  'the',
  'womb',
  'of',
  'time',
  'the',
  'passive',
  'subject',
  'of',
  'chemical',
  'affinity',
  'this',
  'is',
  'very',
  'fine',
  'but',
  'it',
  'should',
  'be',
  'observed',
  'that',
  'no',
  'astronomer',
  'would',
  'have',
  'made',
  'such',
  'remark',
  'especially',
  'to',
  'any',
  'journal',
  'of',
  'for',
  'the',
  'earth',
  'in',
  'the',
  'sense',
  'intended',
  'is',
  'not',
  'only',
  'thirteen',
  'but',
  'fortynine',
  'times',
  'larger',
  'than',
  'the',
  'moon'],
 ['a',
  'similar',
  'objection',
  'applies',
  'to',
  'the',
  'whole',
  'of',
  'the',
  'concluding',
  'pages',
  'where',
  'by',
  'way',
  'of',
  'introduction',
  'to',
  'some',
  'discoveries',
  'in',
  'the',
  'philosophical',
  'correspondent',
  'enters',
  'into',
  'a',
  'minute',
  'schoolboy',
  'account',
  'of',
  'that',
  'planet',
  'this',
  'to',
  'the',
  'journal',
  'of',
  'science'],
 ['but',
  'there',
  'is',
  'one',
  'point',
  'in',
  'particular',
  'which',
  'should',
  'have',
  'betrayed',
  'the',
  'fiction'],
 ['let',
  'us',
  'imagine',
  'the',
  'power',
  'actually',
  'possessed',
  'of',
  'seeing',
  'animals',
  'upon',
  'the',
  'moon',
  's',
  'surface',
  'what',
  'would',
  'first',
  'arrest',
  'the',
  'attention',
  'of',
  'an',
  'observer',
  'from',
  'the',
  'earth'],
 ['certainly',
  'neither',
  'their',
  'shape',
  'size',
  'nor',
  'any',
  'other',
  'such',
  'peculiarity',
  'so',
  'soon',
  'as',
  'their',
  'remarkable',
  'situation'],
 ['they',
  'would',
  'appear',
  'to',
  'be',
  'walking',
  'with',
  'heels',
  'up',
  'and',
  'head',
  'down',
  'in',
  'the',
  'manner',
  'of',
  'flies',
  'on',
  'a',
  'ceiling'],
 ['the',
  'real',
  'observer',
  'would',
  'have',
  'uttered',
  'an',
  'instant',
  'ejaculation',
  'of',
  'surprise',
  'however',
  'prepared',
  'by',
  'previous',
  'knowledge',
  'at',
  'the',
  'singularity',
  'of',
  'their',
  'position',
  'the',
  'fictitious',
  'observer',
  'has',
  'not',
  'even',
  'mentioned',
  'the',
  'subject',
  'but',
  'speaks',
  'of',
  'seeing',
  'the',
  'entire',
  'bodies',
  'of',
  'such',
  'creatures',
  'when',
  'it',
  'is',
  'demonstrable',
  'that',
  'he',
  'could',
  'have',
  'seen',
  'only',
  'the',
  'diameter',
  'of',
  'their',
  'heads'],
 ['it',
  'might',
  'as',
  'well',
  'be',
  'remarked',
  'in',
  'conclusion',
  'that',
  'the',
  'size',
  'and',
  'particularly',
  'the',
  'powers',
  'of',
  'the',
  'manbats',
  'for',
  'example',
  'their',
  'ability',
  'to',
  'fly',
  'in',
  'so',
  'rare',
  'an',
  'atmosphere',
  'if',
  'indeed',
  'the',
  'moon',
  'have',
  'any',
  'with',
  'most',
  'of',
  'the',
  'other',
  'fancies',
  'in',
  'regard',
  'to',
  'animal',
  'and',
  'vegetable',
  'existence',
  'are',
  'at',
  'variance',
  'generally',
  'with',
  'all',
  'analogical',
  'reasoning',
  'on',
  'these',
  'themes',
  'and',
  'that',
  'analogy',
  'here',
  'will',
  'often',
  'amount',
  'to',
  'conclusive',
  'demonstration'],
 ['it',
  'is',
  'perhaps',
  'scarcely',
  'necessary',
  'to',
  'add',
  'that',
  'all',
  'the',
  'suggestions',
  'attributed',
  'to',
  'and',
  'in',
  'the',
  'beginning',
  'of',
  'the',
  'article',
  'about',
  'a',
  'transfusion',
  'of',
  'artificial',
  'light',
  'through',
  'the',
  'focal',
  'object',
  'of',
  'vision',
  'etc',
  'etc',
  'belong',
  'to',
  'that',
  'species',
  'of',
  'figurative',
  'writing',
  'which',
  'comes',
  'most',
  'properly',
  'under',
  'the',
  'denomination',
  'of',
  'rigmarole'],
 ['there',
  'is',
  'a',
  'real',
  'and',
  'very',
  'definite',
  'limit',
  'to',
  'optical',
  'discovery',
  'among',
  'the',
  'stars',
  'a',
  'limit',
  'whose',
  'nature',
  'need',
  'only',
  'be',
  'stated',
  'to',
  'be',
  'understood'],
 ['if',
  'indeed',
  'the',
  'casting',
  'of',
  'large',
  'lenses',
  'were',
  'all',
  'that',
  'is',
  'required',
  'man',
  's',
  'ingenuity',
  'would',
  'ultimately',
  'prove',
  'equal',
  'to',
  'the',
  'task',
  'and',
  'we',
  'might',
  'have',
  'them',
  'of',
  'any',
  'size',
  'demanded'],
 ['but',
  'unhappily',
  'in',
  'proportion',
  'to',
  'the',
  'increase',
  'of',
  'size',
  'in',
  'the',
  'lens',
  'and',
  'consequently',
  'of',
  'spacepenetrating',
  'power',
  'is',
  'the',
  'diminution',
  'of',
  'light',
  'from',
  'the',
  'object',
  'by',
  'diffusion',
  'of',
  'its',
  'rays'],
 ['and',
  'for',
  'this',
  'evil',
  'there',
  'is',
  'no',
  'remedy',
  'within',
  'human',
  'ability',
  'for',
  'an',
  'object',
  'is',
  'seen',
  'by',
  'means',
  'of',
  'that',
  'light',
  'alone',
  'which',
  'proceeds',
  'from',
  'itself',
  'whether',
  'direct',
  'or',
  'reflected'],
 ['thus',
  'the',
  'only',
  'artificial',
  'light',
  'which',
  'could',
  'avail',
  'would',
  'be',
  'some',
  'artificial',
  'light',
  'which',
  'he',
  'should',
  'be',
  'able',
  'to',
  'thrownot',
  'upon',
  'the',
  'focal',
  'object',
  'of',
  'vision',
  'but',
  'upon',
  'the',
  'real',
  'object',
  'to',
  'be',
  'viewedto',
  'wit',
  'upon',
  'the',
  'moon'],
 ['it',
  'has',
  'been',
  'easily',
  'calculated',
  'that',
  'when',
  'the',
  'light',
  'proceeding',
  'from',
  'a',
  'star',
  'becomes',
  'so',
  'diffused',
  'as',
  'to',
  'be',
  'as',
  'weak',
  'as',
  'the',
  'natural',
  'light',
  'proceeding',
  'from',
  'the',
  'whole',
  'of',
  'the',
  'stars',
  'in',
  'a',
  'clear',
  'and',
  'moonless',
  'night',
  'then',
  'the',
  'star',
  'is',
  'no',
  'longer',
  'visible',
  'for',
  'any',
  'practical',
  'purpose'],
 ['the',
  'of',
  's',
  'telescope',
  'lately',
  'constructed',
  'in',
  'has',
  'a',
  'speculum',
  'with',
  'a',
  'reflecting',
  'surface',
  'of',
  '4071',
  'square',
  'inches',
  'the',
  'telescope',
  'having',
  'one',
  'of',
  'only',
  '1811'],
 ['the',
  'metal',
  'of',
  'the',
  'of',
  's',
  'is',
  '6',
  'feet',
  'diameter',
  'it',
  'is',
  '5',
  '12',
  'inches',
  'thick',
  'at',
  'the',
  'edges',
  'and',
  '5',
  'at',
  'the',
  'centre'],
 ['the', 'weight', 'is', '3', 'tons'],
 ['the', 'focal', 'length', 'is', '50', 'feet'],
 ['i',
  'have',
  'lately',
  'read',
  'a',
  'singular',
  'and',
  'somewhat',
  'ingenious',
  'little',
  'book',
  'whose',
  'titlepage',
  'runs',
  'thus',
  'dans',
  'la',
  'lvne',
  'ou',
  'le',
  'fait',
  'au',
  'de',
  'la',
  'nouellement',
  'decouvert',
  'par',
  'autrem',
  't',
  'dit',
  'le',
  'volant'],
 ['en', 'notre', 'langve', 'par'],
 ['chez', 'pres', 'la', 'de'],
 ['chez',
  'au',
  'premier',
  'pilier',
  'de',
  'la',
  'grand',
  'salle',
  'du',
  'proche',
  'les'],
 ['76'],
 ['the',
  'writer',
  'professes',
  'to',
  'have',
  'translated',
  'his',
  'work',
  'from',
  'the',
  'of',
  'one'],
 ['although',
  'there',
  'is',
  'a',
  'terrible',
  'ambiguity',
  'in',
  'the',
  'statement'],
 ['en',
  'ai',
  'eu',
  'says',
  'he',
  'l',
  'original',
  'de',
  'medecin',
  'des',
  'mieux',
  'versez',
  'qui',
  'soient',
  'aujourd',
  'huy',
  'dans',
  'la',
  'cõnoissance',
  'des',
  'et',
  'sur',
  'tout',
  'de',
  'la'],
 ['lui',
  'ai',
  'cette',
  'obligation',
  'entre',
  'les',
  'autres',
  'de',
  'm',
  'auoir',
  'non',
  'seulement',
  'mis',
  'en',
  'main',
  'cc',
  'en',
  'anglois',
  'mais',
  'encore',
  'le',
  'du',
  'gentilhomme',
  'recommandable',
  'pour',
  'sa',
  'vertu',
  'sur',
  'la',
  'version',
  'duquel',
  'j',
  'advoue',
  'que',
  'j',
  'ay',
  'tiré',
  'le',
  'plan',
  'de',
  'la',
  'mienne'],
 ['after',
  'some',
  'irrelevant',
  'adventures',
  'much',
  'in',
  'the',
  'manner',
  'of',
  'and',
  'which',
  'occupy',
  'the',
  'first',
  'thirty',
  'pages',
  'the',
  'author',
  'relates',
  'that',
  'being',
  'ill',
  'during',
  'a',
  'sea',
  'voyage',
  'the',
  'crew',
  'abandoned',
  'him',
  'together',
  'with',
  'a',
  'negro',
  'servant',
  'on',
  'the',
  'island',
  'of'],
 ['to',
  'increase',
  'the',
  'chances',
  'of',
  'obtaining',
  'food',
  'the',
  'two',
  'separate',
  'and',
  'live',
  'as',
  'far',
  'apart',
  'as',
  'possible'],
 ['this',
  'brings',
  'about',
  'a',
  'training',
  'of',
  'birds',
  'to',
  'serve',
  'the',
  'purpose',
  'of',
  'carrierpigeons',
  'between',
  'them'],
 ['by',
  'and',
  'by',
  'these',
  'are',
  'taught',
  'to',
  'carry',
  'parcels',
  'of',
  'some',
  'weightand',
  'this',
  'weight',
  'is',
  'gradually',
  'increased'],
 ['at',
  'length',
  'the',
  'idea',
  'is',
  'entertained',
  'of',
  'uniting',
  'the',
  'force',
  'of',
  'a',
  'great',
  'number',
  'of',
  'the',
  'birds',
  'with',
  'a',
  'view',
  'to',
  'raising',
  'the',
  'author',
  'himself'],
 ['a',
  'machine',
  'is',
  'contrived',
  'for',
  'the',
  'purpose',
  'and',
  'we',
  'have',
  'a',
  'minute',
  'description',
  'of',
  'it',
  'which',
  'is',
  'materially',
  'helped',
  'out',
  'by',
  'a',
  'steel',
  'engraving'],
 ['here',
  'we',
  'perceive',
  'the',
  'with',
  'point',
  'ruffles',
  'and',
  'a',
  'huge',
  'periwig',
  'seated',
  'astride',
  'something',
  'which',
  'resembles',
  'very',
  'closely',
  'a',
  'broomstick',
  'and',
  'borne',
  'aloft',
  'by',
  'a',
  'multitude',
  'of',
  'wild',
  'swans',
  'ganzas',
  'who',
  'had',
  'strings',
  'reaching',
  'from',
  'their',
  'tails',
  'to',
  'the',
  'machine'],
 ['the',
  'main',
  'event',
  'detailed',
  'in',
  'the',
  's',
  'narrative',
  'depends',
  'upon',
  'a',
  'very',
  'important',
  'fact',
  'of',
  'which',
  'the',
  'reader',
  'is',
  'kept',
  'in',
  'ignorance',
  'until',
  'near',
  'the',
  'end',
  'of',
  'the',
  'book'],
 ['the',
  'ganzas',
  'whom',
  'he',
  'had',
  'become',
  'so',
  'familiar',
  'were',
  'not',
  'really',
  'denizens',
  'of',
  'but',
  'of',
  'the',
  'moon'],
 ['thence',
  'it',
  'had',
  'been',
  'their',
  'custom',
  'time',
  'out',
  'of',
  'mind',
  'to',
  'migrate',
  'annually',
  'to',
  'some',
  'portion',
  'of',
  'the',
  'earth'],
 ['in',
  'proper',
  'season',
  'of',
  'course',
  'they',
  'would',
  'return',
  'home',
  'and',
  'the',
  'author',
  'happening',
  'one',
  'day',
  'to',
  'require',
  'their',
  'services',
  'for',
  'a',
  'short',
  'voyage',
  'is',
  'unexpectedly',
  'carried',
  'straight',
  'tip',
  'and',
  'in',
  'a',
  'very',
  'brief',
  'period',
  'arrives',
  'at',
  'the',
  'satellite'],
 ['here',
  'he',
  'finds',
  'among',
  'other',
  'odd',
  'things',
  'that',
  'the',
  'people',
  'enjoy',
  'extreme',
  'happiness',
  'that',
  'they',
  'have',
  'no',
  'law',
  'that',
  'they',
  'die',
  'without',
  'pain',
  'that',
  'they',
  'are',
  'from',
  'ten',
  'to',
  'thirty',
  'feet',
  'in',
  'height',
  'that',
  'they',
  'live',
  'five',
  'thousand',
  'years',
  'that',
  'they',
  'have',
  'an',
  'emperor',
  'called',
  'and',
  'that',
  'they',
  'can',
  'jump',
  'sixty',
  'feet',
  'high',
  'when',
  'being',
  'out',
  'of',
  'the',
  'gravitating',
  'influence',
  'they',
  'fly',
  'about',
  'with',
  'fans'],
 ['i',
  'can',
  'not',
  'forbear',
  'giving',
  'a',
  'specimen',
  'of',
  'the',
  'general',
  'philosophy',
  'of',
  'the',
  'volume'],
 ['i',
  'must',
  'not',
  'forget',
  'here',
  'that',
  'the',
  'stars',
  'appeared',
  'only',
  'on',
  'that',
  'side',
  'of',
  'the',
  'globe',
  'turned',
  'toward',
  'the',
  'moon',
  'and',
  'that',
  'the',
  'closer',
  'they',
  'were',
  'to',
  'it',
  'the',
  'larger',
  'they',
  'seemed'],
 ['i', 'have', 'also', 'me', 'and', 'the', 'earth'],
 ['as',
  'to',
  'the',
  'stars',
  'since',
  'there',
  'was',
  'no',
  'night',
  'where',
  'i',
  'was',
  'they',
  'always',
  'had',
  'the',
  'same',
  'appearance',
  'not',
  'brilliant',
  'as',
  'usual',
  'but',
  'pale',
  'and',
  'very',
  'nearly',
  'like',
  'the',
  'moon',
  'of',
  'a',
  'morning'],
 ['but',
  'few',
  'of',
  'them',
  'were',
  'visible',
  'and',
  'these',
  'ten',
  'times',
  'larger',
  'as',
  'well',
  'as',
  'i',
  'could',
  'judge',
  'than',
  'they',
  'seem',
  'to',
  'the',
  'inhabitants',
  'of',
  'the',
  'earth'],
 ['the',
  'moon',
  'which',
  'wanted',
  'two',
  'days',
  'of',
  'being',
  'full',
  'was',
  'of',
  'a',
  'terrible',
  'bigness'],
 ['i',
  'must',
  'not',
  'forget',
  'here',
  'that',
  'the',
  'stars',
  'appeared',
  'only',
  'on',
  'that',
  'side',
  'of',
  'the',
  'globe',
  'turned',
  'toward',
  'the',
  'moon',
  'and',
  'that',
  'the',
  'closer',
  'they',
  'were',
  'to',
  'it',
  'the',
  'larger',
  'they',
  'seemed'],
 ['i',
  'have',
  'also',
  'to',
  'inform',
  'you',
  'that',
  'whether',
  'it',
  'was',
  'calm',
  'weather',
  'or',
  'stormy',
  'i',
  'found',
  'myself',
  'always',
  'immediately',
  'between',
  'the',
  'moon',
  'and',
  'the',
  'earth',
  'convinced',
  'of',
  'this',
  'for',
  'two',
  'reasonsbecause',
  'my',
  'birds',
  'always',
  'flew',
  'in',
  'a',
  'straight',
  'line',
  'and',
  'because',
  'whenever',
  'we',
  'attempted',
  'to',
  'rest',
  'were',
  'carried',
  'insensibly',
  'around',
  'the',
  'globe',
  'of',
  'the',
  'earth'],
 ['for',
  'i',
  'admit',
  'the',
  'opinion',
  'of',
  'who',
  'maintains',
  'that',
  'it',
  'never',
  'ceases',
  'to',
  'revolve',
  'from',
  'the',
  'east',
  'to',
  'the',
  'west',
  'upon',
  'the',
  'poles',
  'of',
  'the',
  'commonly',
  'called',
  'the',
  'poles',
  'of',
  'the',
  'world',
  'but',
  'upon',
  'those',
  'of',
  'the',
  'a',
  'question',
  'of',
  'which',
  'i',
  'propose',
  'to',
  'speak',
  'more',
  'at',
  'length',
  'hereafter',
  'when',
  'i',
  'shall',
  'have',
  'leisure',
  'to',
  'refresh',
  'my',
  'memory',
  'in',
  'regard',
  'to',
  'the',
  'astrology',
  'which',
  'i',
  'learned',
  'at',
  'when',
  'young',
  'and',
  'have',
  'since',
  'forgotten'],
 ['notwithstanding',
  'the',
  'blunders',
  'italicized',
  'the',
  'book',
  'is',
  'not',
  'without',
  'some',
  'claim',
  'to',
  'attention',
  'as',
  'affording',
  'a',
  'naive',
  'specimen',
  'of',
  'the',
  'current',
  'astronomical',
  'notions',
  'of',
  'the',
  'time'],
 ['one',
  'of',
  'these',
  'assumed',
  'that',
  'the',
  'gravitating',
  'power',
  'extended',
  'but',
  'a',
  'short',
  'distance',
  'from',
  'the',
  'earth',
  's',
  'surface',
  'and',
  'accordingly',
  'we',
  'find',
  'our',
  'voyager',
  'carried',
  'insensibly',
  'around',
  'the',
  'globe',
  'etc'],
 ['there',
  'have',
  'been',
  'other',
  'voyages',
  'to',
  'the',
  'moon',
  'but',
  'none',
  'of',
  'higher',
  'merit',
  'than',
  'the',
  'one',
  'just',
  'mentioned'],
 ['that', 'of', 'is', 'utterly', 'meaningless'],
 ['in',
  'the',
  'third',
  'volume',
  'of',
  'the',
  'american',
  'will',
  'be',
  'found',
  'quite',
  'an',
  'elaborate',
  'criticism',
  'upon',
  'a',
  'certain',
  'journey',
  'of',
  'the',
  'kind',
  'in',
  'question',
  'a',
  'criticism',
  'in',
  'which',
  'it',
  'is',
  'difficult',
  'to',
  'say',
  'whether',
  'the',
  'critic',
  'most',
  'exposes',
  'the',
  'stupidity',
  'of',
  'the',
  'book',
  'or',
  'his',
  'own',
  'absurd',
  'ignorance',
  'of',
  'astronomy'],
 ['i',
  'forget',
  'the',
  'title',
  'of',
  'the',
  'work',
  'but',
  'the',
  'of',
  'the',
  'voyage',
  'are',
  'more',
  'deplorably',
  'ill',
  'conceived',
  'than',
  'are',
  'even',
  'the',
  'ganzas',
  'of',
  'our',
  'friend',
  'the'],
 ['the',
  'adventurer',
  'in',
  'digging',
  'the',
  'earth',
  'happens',
  'to',
  'discover',
  'a',
  'peculiar',
  'metal',
  'for',
  'which',
  'the',
  'moon',
  'has',
  'a',
  'strong',
  'attraction',
  'and',
  'straightway',
  'constructs',
  'of',
  'it',
  'a',
  'box',
  'which',
  'when',
  'cast',
  'loose',
  'from',
  'its',
  'terrestrial',
  'fastenings',
  'flies',
  'with',
  'him',
  'forthwith',
  'to',
  'the',
  'satellite'],
 ['the',
  'of',
  'is',
  'a',
  'jeu',
  'd',
  'esprit',
  'not',
  'altogether',
  'contemptible',
  'and',
  'has',
  'been',
  'translated',
  'into'],
 ['the',
  'hero',
  'was',
  'in',
  'fact',
  'the',
  'gamekeeper',
  'of',
  'an',
  'peer',
  'whose',
  'eccentricities',
  'gave',
  'rise',
  'to',
  'the',
  'tale'],
 ['the',
  'flight',
  'is',
  'made',
  'on',
  'an',
  'eagle',
  's',
  'back',
  'from',
  'a',
  'lofty',
  'mountain',
  'at',
  'the',
  'end',
  'of'],
 ['in',
  'these',
  'various',
  'brochures',
  'the',
  'aim',
  'is',
  'always',
  'satirical',
  'the',
  'theme',
  'being',
  'a',
  'description',
  'of',
  'lunarian',
  'customs',
  'as',
  'compared',
  'with',
  'ours'],
 ['in',
  'none',
  'is',
  'there',
  'any',
  'effort',
  'at',
  'in',
  'the',
  'details',
  'of',
  'the',
  'voyage',
  'itself'],
 ['the',
  'writers',
  'seem',
  'in',
  'each',
  'instance',
  'to',
  'be',
  'utterly',
  'uninformed',
  'in',
  'respect',
  'to',
  'astronomy'],
 ['in',
  'the',
  'design',
  'is',
  'original',
  'inasmuch',
  'as',
  'regards',
  'an',
  'attempt',
  'at',
  'in',
  'the',
  'application',
  'of',
  'scientific',
  'principles',
  'so',
  'far',
  'as',
  'the',
  'whimsical',
  'nature',
  'of',
  'the',
  'subject',
  'would',
  'permit',
  'to',
  'the',
  'actual',
  'passage',
  'between',
  'the',
  'earth',
  'and',
  'the',
  'moon'],
 ['2',
  'the',
  'zodiacal',
  'light',
  'is',
  'probably',
  'what',
  'the',
  'ancients',
  'called'],
 ['emicant', 'quos', 'docos', 'vocant', 'lib'],
 ['2', 'p', '26'],
 ['3',
  'since',
  'the',
  'original',
  'publication',
  'of',
  'i',
  'find',
  'that',
  'of',
  'balloon',
  'notoriety',
  'and',
  'other',
  'late',
  'aeronauts',
  'deny',
  'the',
  'assertions',
  'of',
  'in',
  'this',
  'respect',
  'and',
  'speak',
  'of',
  'a',
  'decreasing',
  'inconvenience',
  'precisely',
  'in',
  'accordance',
  'with',
  'the',
  'theory',
  'here',
  'urged',
  'in',
  'a',
  'mere',
  'spirit',
  'of',
  'banter'],
 ['4',
  'writes',
  'that',
  'he',
  'has',
  'several',
  'times',
  'found',
  'in',
  'skies',
  'perfectly',
  'clear',
  'when',
  'even',
  'stars',
  'of',
  'the',
  'sixth',
  'and',
  'seventh',
  'magnitude',
  'were',
  'conspicuous',
  'that',
  'at',
  'the',
  'same',
  'altitude',
  'of',
  'the',
  'moon',
  'at',
  'the',
  'same',
  'elongation',
  'from',
  'the',
  'earth',
  'and',
  'with',
  'one',
  'and',
  'the',
  'same',
  'excellent',
  'telescope',
  'the',
  'moon',
  'and',
  'its',
  'maculae',
  'did',
  'not',
  'appear',
  'equally',
  'lucid',
  'at',
  'all',
  'times'],
 ['from',
  'the',
  'circumstances',
  'of',
  'the',
  'observation',
  'it',
  'is',
  'evident',
  'that',
  'the',
  'cause',
  'of',
  'this',
  'phenomenon',
  'is',
  'not',
  'either',
  'in',
  'our',
  'air',
  'in',
  'the',
  'tube',
  'in',
  'the',
  'moon',
  'or',
  'in',
  'the',
  'eye',
  'of',
  'the',
  'spectator',
  'but',
  'must',
  'be',
  'looked',
  'for',
  'in',
  'something',
  'an',
  'atmosphere'],
 ['existing', 'about', 'the', 'moon'],
 ['the'],
 ['what', 'ho'],
 ['what', 'ho'],
 ['this', 'fellow', 'is', 'dancing', 'mad'],
 ['he', 'hath', 'been', 'bitten', 'by', 'the'],
 ['all', 'in', 'the'],
 ['many', 'years', 'ago', 'i', 'contracted', 'an', 'intimacy', 'with', 'a'],
 ['he',
  'was',
  'of',
  'an',
  'ancient',
  'family',
  'and',
  'had',
  'once',
  'been',
  'wealthy',
  'but',
  'a',
  'series',
  'of',
  'misfortunes',
  'had',
  'reduced',
  'him',
  'to',
  'want'],
 ['to',
  'avoid',
  'the',
  'mortification',
  'consequent',
  'upon',
  'his',
  'disasters',
  'he',
  'left',
  'the',
  'city',
  'of',
  'his',
  'forefathers',
  'and',
  'took',
  'up',
  'his',
  'residence',
  'at',
  's',
  'near'],
 ['this', 'is', 'a', 'very', 'singular', 'one'],
 ['it',
  'consists',
  'of',
  'little',
  'else',
  'than',
  'the',
  'sea',
  'sand',
  'and',
  'is',
  'about',
  'three',
  'miles',
  'long'],
 ['its',
  'breadth',
  'at',
  'no',
  'point',
  'exceeds',
  'a',
  'quarter',
  'of',
  'a',
  'mile'],
 ['it',
  'is',
  'separated',
  'from',
  'the',
  'main',
  'land',
  'by',
  'a',
  'scarcely',
  'perceptible',
  'creek',
  'oozing',
  'its',
  'way',
  'through',
  'a',
  'wilderness',
  'of',
  'reeds',
  'and',
  'slime',
  'a',
  'favorite',
  'resort',
  'of',
  'the',
  'marsh',
  'hen'],
 ['the',
  'vegetation',
  'as',
  'might',
  'be',
  'supposed',
  'is',
  'scant',
  'or',
  'at',
  'least',
  'dwarfish'],
 ['no', 'trees', 'of', 'any', 'magnitude', 'are', 'to', 'be', 'seen'],
 ['near',
  'the',
  'western',
  'extremity',
  'where',
  'stands',
  'and',
  'where',
  'are',
  'some',
  'miserable',
  'frame',
  'buildings',
  'tenanted',
  'during',
  'summer',
  'by',
  'the',
  'fugitives',
  'from',
  'dust',
  'and',
  'fever',
  'may',
  'be',
  'found',
  'indeed',
  'the',
  'bristly',
  'palmetto',
  'but',
  'the',
  'whole',
  'island',
  'with',
  'the',
  'exception',
  'of',
  'this',
  'western',
  'point',
  'and',
  'a',
  'line',
  'of',
  'hard',
  'white',
  'beach',
  'on',
  'the',
  'seacoast',
  'is',
  'covered',
  'with',
  'a',
  'dense',
  'undergrowth',
  'of',
  'the',
  'sweet',
  'myrtle',
  'so',
  'much',
  'prized',
  'by',
  'the',
  'horticulturists',
  'of'],
 ['the',
  'shrub',
  'here',
  'often',
  'attains',
  'the',
  'height',
  'of',
  'fifteen',
  'or',
  'twenty',
  'feet',
  'and',
  'forms',
  'an',
  'almost',
  'impenetrable',
  'coppice',
  'burthening',
  'the',
  'air',
  'with',
  'its',
  'fragrance'],
 ['in',
  'the',
  'inmost',
  'recesses',
  'of',
  'this',
  'coppice',
  'not',
  'far',
  'from',
  'the',
  'eastern',
  'or',
  'more',
  'remote',
  'end',
  'of',
  'the',
  'island',
  'had',
  'built',
  'himself',
  'a',
  'small',
  'hut',
  'which',
  'he',
  'occupied',
  'when',
  'i',
  'first',
  'by',
  'mere',
  'accident',
  'made',
  'his',
  'acquaintance'],
 ['this',
  'soon',
  'ripened',
  'into',
  'friendship',
  'for',
  'there',
  'was',
  'much',
  'in',
  'the',
  'recluse',
  'to',
  'excite',
  'interest',
  'and',
  'esteem'],
 ['i',
  'found',
  'him',
  'well',
  'educated',
  'with',
  'unusual',
  'powers',
  'of',
  'mind',
  'but',
  'infected',
  'with',
  'misanthropy',
  'and',
  'subject',
  'to',
  'perverse',
  'moods',
  'of',
  'alternate',
  'enthusiasm',
  'and',
  'melancholy'],
 ['he',
  'had',
  'with',
  'him',
  'many',
  'books',
  'but',
  'rarely',
  'employed',
  'them'],
 ['his',
  'chief',
  'amusements',
  'were',
  'gunning',
  'and',
  'fishing',
  'or',
  'sauntering',
  'along',
  'the',
  'beach',
  'and',
  'through',
  'the',
  'myrtles',
  'in',
  'quest',
  'of',
  'shells',
  'or',
  'entomological',
  'specimens',
  'his',
  'collection',
  'of',
  'the',
  'latter',
  'might',
  'have',
  'been',
  'envied',
  'by',
  'a'],
 ['in',
  'these',
  'excursions',
  'he',
  'was',
  'usually',
  'accompanied',
  'by',
  'an',
  'old',
  'negro',
  'called',
  'who',
  'had',
  'been',
  'manumitted',
  'before',
  'the',
  'reverses',
  'of',
  'the',
  'family',
  'but',
  'who',
  'could',
  'be',
  'induced',
  'neither',
  'by',
  'threats',
  'nor',
  'by',
  'promises',
  'to',
  'abandon',
  'what',
  'he',
  'considered',
  'his',
  'right',
  'of',
  'attendance',
  'upon',
  'the',
  'footsteps',
  'of',
  'his',
  'young',
  'it',
  'is',
  'not',
  'improbable',
  'that',
  'the',
  'relatives',
  'of',
  'conceiving',
  'him',
  'to',
  'be',
  'somewhat',
  'unsettled',
  'in',
  'intellect',
  'had',
  'contrived',
  'to',
  'instil',
  'this',
  'obstinacy',
  'into',
  'with',
  'a',
  'view',
  'to',
  'the',
  'supervision',
  'and',
  'guardianship',
  'of',
  'the',
  'wanderer'],
 ['the',
  'winters',
  'in',
  'the',
  'latitude',
  'of',
  's',
  'are',
  'seldom',
  'very',
  'severe',
  'and',
  'in',
  'the',
  'fall',
  'of',
  'the',
  'year',
  'it',
  'is',
  'a',
  'rare',
  'event',
  'indeed',
  'when',
  'a',
  'fire',
  'is',
  'considered',
  'necessary'],
 ['about',
  'the',
  'middle',
  'of',
  '18',
  'there',
  'occurred',
  'however',
  'a',
  'day',
  'of',
  'remarkable',
  'chilliness'],
 ['just',
  'before',
  'sunset',
  'i',
  'scrambled',
  'my',
  'way',
  'through',
  'the',
  'evergreens',
  'to',
  'the',
  'hut',
  'of',
  'my',
  'friend',
  'whom',
  'i',
  'had',
  'not',
  'visited',
  'for',
  'several',
  'weeks',
  'my',
  'residence',
  'being',
  'at',
  'that',
  'time',
  'in',
  'a',
  'distance',
  'of',
  'nine',
  'miles',
  'from',
  'the',
  'while',
  'the',
  'facilities',
  'of',
  'passage',
  'and',
  'repassage',
  'were',
  'very',
  'far',
  'behind',
  'those',
  'of',
  'the',
  'present',
  'day'],
 ['upon',
  'reaching',
  'the',
  'hut',
  'i',
  'rapped',
  'as',
  'was',
  'my',
  'custom',
  'and',
  'getting',
  'no',
  'reply',
  'sought',
  'for',
  'the',
  'key',
  'where',
  'i',
  'knew',
  'it',
  'was',
  'secreted',
  'unlocked',
  'the',
  'door',
  'and',
  'went',
  'in'],
 ['a', 'fine', 'fire', 'was', 'blazing', 'upon', 'the', 'hearth'],
 ['it',
  'was',
  'a',
  'novelty',
  'and',
  'by',
  'no',
  'means',
  'an',
  'ungrateful',
  'one'],
 ['i',
  'threw',
  'off',
  'an',
  'overcoat',
  'took',
  'an',
  'armchair',
  'by',
  'the',
  'crackling',
  'logs',
  'and',
  'awaited',
  'patiently',
  'the',
  'arrival',
  'of',
  'my',
  'hosts'],
 ['soon',
  'after',
  'dark',
  'they',
  'arrived',
  'and',
  'gave',
  'me',
  'a',
  'most',
  'cordial',
  'welcome'],
 ['grinning',
  'from',
  'ear',
  'to',
  'ear',
  'bustled',
  'about',
  'to',
  'prepare',
  'some',
  'marshhens',
  'for',
  'supper'],
 ['was',
  'in',
  'one',
  'of',
  'his',
  'fits',
  'how',
  'else',
  'shall',
  'i',
  'term',
  'them',
  'of',
  'enthusiasm'],
 ['he',
  'had',
  'found',
  'an',
  'unknown',
  'bivalve',
  'forming',
  'a',
  'new',
  'genus',
  'and',
  'more',
  'than',
  'this',
  'he',
  'had',
  'hunted',
  'down',
  'and',
  'secured',
  'with',
  's',
  'assistance',
  'a',
  'scarabæus',
  'which',
  'he',
  'believed',
  'to',
  'be',
  'totally',
  'new',
  'but',
  'in',
  'respect',
  'to',
  'which',
  'he',
  'wished',
  'to',
  'have',
  'my',
  'opinion',
  'on',
  'the',
  'morrow'],
 ['and',
  'why',
  'not',
  'tonight',
  'i',
  'asked',
  'rubbing',
  'my',
  'hands',
  'over',
  'the',
  'blaze',
  'and',
  'wishing',
  'the',
  'whole',
  'tribe',
  'of',
  'scarabæi',
  'at',
  'the',
  'devil'],
 ['if',
  'i',
  'had',
  'only',
  'known',
  'you',
  'were',
  'here',
  'said',
  'but',
  'it',
  's',
  'so',
  'long',
  'since',
  'i',
  'saw',
  'you',
  'and',
  'how',
  'could',
  'i',
  'foresee',
  'that',
  'you',
  'would',
  'pay',
  'me',
  'a',
  'visit',
  'this',
  'very',
  'night',
  'of',
  'all',
  'others'],
 ['as',
  'i',
  'was',
  'coming',
  'home',
  'i',
  'met',
  'from',
  'the',
  'fort',
  'and',
  'very',
  'foolishly',
  'i',
  'lent',
  'him',
  'the',
  'bug',
  'so',
  'it',
  'will',
  'be',
  'impossible',
  'for',
  'you',
  'to',
  'see',
  'it',
  'until',
  'the',
  'morning'],
 ['here',
  'tonight',
  'and',
  'i',
  'will',
  'send',
  'down',
  'for',
  'it',
  'at',
  'sunrise'],
 ['it', 'is', 'the', 'loveliest', 'thing', 'in', 'creation'],
 ['what', 'sunrise'],
 ['nonsense'],
 ['no', 'the', 'bug'],
 ['it',
  'is',
  'of',
  'a',
  'brilliant',
  'gold',
  'color',
  'about',
  'the',
  'size',
  'of',
  'a',
  'large',
  'hickorynut',
  'with',
  'two',
  'jet',
  'black',
  'spots',
  'near',
  'one',
  'extremity',
  'of',
  'the',
  'back',
  'and',
  'another',
  'somewhat',
  'longer',
  'at',
  'the',
  'other'],
 ['the', 'antennæ', 'are'],
 ['aint',
  'no',
  'tin',
  'in',
  'him',
  'i',
  'keep',
  'a',
  'tellin',
  'on',
  'you',
  'here',
  'interrupted',
  'de',
  'bug',
  'is',
  'a',
  'goole',
  'bug',
  'solid',
  'ebery',
  'bit',
  'of',
  'him',
  'inside',
  'and',
  'all',
  'sep',
  'him',
  'wing',
  'neber',
  'feel',
  'half',
  'so',
  'hebby',
  'a',
  'bug',
  'in',
  'my',
  'life'],
 ['suppose',
  'it',
  'is',
  'replied',
  'somewhat',
  'more',
  'earnestly',
  'it',
  'seemed',
  'to',
  'me',
  'than',
  'the',
  'case',
  'demanded',
  'is',
  'that',
  'any',
  'reason',
  'for',
  'your',
  'letting',
  'the',
  'birds',
  'burn'],
 ['the',
  'color',
  'here',
  'he',
  'turned',
  'to',
  'me',
  'is',
  'really',
  'almost',
  'enough',
  'to',
  'warrant',
  's',
  'idea'],
 ['you',
  'never',
  'saw',
  'a',
  'more',
  'brilliant',
  'metallic',
  'lustre',
  'than',
  'the',
  'scales',
  'emit',
  'but',
  'of',
  'this',
  'you',
  'can',
  'not',
  'judge',
  'till',
  'tomorrow'],
 ['in',
  'the',
  'mean',
  'time',
  'i',
  'can',
  'give',
  'you',
  'some',
  'idea',
  'of',
  'the',
  'shape',
  'saying',
  'this',
  'he',
  'seated',
  'himself',
  'at',
  'a',
  'small',
  'table',
  'on',
  'which',
  'were',
  'a',
  'pen',
  'and',
  'ink',
  'but',
  'no',
  'paper'],
 ['he', 'looked', 'for', 'some', 'in', 'a', 'drawer', 'but', 'found', 'none'],
 ['mind',
  'said',
  'he',
  'at',
  'length',
  'this',
  'will',
  'answer',
  'and',
  'he',
  'drew',
  'from',
  'his',
  'waistcoat',
  'pocket',
  'a',
  'scrap',
  'of',
  'what',
  'i',
  'took',
  'to',
  'be',
  'very',
  'dirty',
  'foolscap',
  'and',
  'made',
  'upon',
  'it',
  'a',
  'rough',
  'drawing',
  'with',
  'the',
  'pen'],
 ['while',
  'he',
  'did',
  'this',
  'i',
  'retained',
  'my',
  'seat',
  'by',
  'the',
  'fire',
  'for',
  'i',
  'was',
  'still',
  'chilly'],
 ['when',
  'the',
  'design',
  'was',
  'complete',
  'he',
  'handed',
  'it',
  'to',
  'me',
  'without',
  'rising'],
 ['as',
  'i',
  'received',
  'it',
  'a',
  'loud',
  'growl',
  'was',
  'heard',
  'succeeded',
  'by',
  'a',
  'scratching',
  'at',
  'the',
  'door'],
 ['jupiter',
  'opened',
  'it',
  'and',
  'a',
  'large',
  'belonging',
  'to',
  'rushed',
  'in',
  'leaped',
  'upon',
  'my',
  'shoulders',
  'and',
  'loaded',
  'me',
  'with',
  'caresses',
  'for',
  'i',
  'had',
  'shown',
  'him',
  'much',
  'attention',
  'during',
  'previous',
  'visits'],
 ['when',
  'his',
  'gambols',
  'were',
  'over',
  'i',
  'looked',
  'at',
  'the',
  'paper',
  'and',
  'to',
  'speak',
  'the',
  'truth',
  'found',
  'myself',
  'not',
  'a',
  'little',
  'puzzled',
  'at',
  'what',
  'my',
  'friend',
  'had',
  'depicted'],
 ['well',
  'i',
  'said',
  'after',
  'contemplating',
  'it',
  'for',
  'some',
  'minutes',
  'this',
  'is',
  'a',
  'strange',
  'scarabæus',
  'i',
  'must',
  'confess',
  'new',
  'to',
  'me',
  'never',
  'saw',
  'anything',
  'like',
  'it',
  'before',
  'unless',
  'it',
  'was',
  'a',
  'skull',
  'or',
  'a',
  'death',
  'shead',
  'which',
  'it',
  'more',
  'nearly',
  'resembles',
  'than',
  'anything',
  'else',
  'that',
  'has',
  'come',
  'under',
  'my',
  'observation'],
 ['a',
  'death',
  'shead',
  'echoed',
  'yes',
  'well',
  'it',
  'has',
  'something',
  'of',
  'that',
  'appearance',
  'upon',
  'paper',
  'no',
  'doubt'],
 ['the', 'two', 'upper', 'black', 'spots', 'look', 'like', 'eyes', 'eh'],
 ['and',
  'the',
  'longer',
  'one',
  'at',
  'the',
  'bottom',
  'like',
  'a',
  'mouth',
  'and',
  'then',
  'the',
  'shape',
  'of',
  'the',
  'whole',
  'is',
  'oval'],
 ['perhaps',
  'so',
  'said',
  'i',
  'but',
  'i',
  'fear',
  'you',
  'are',
  'no',
  'artist'],
 ['i',
  'must',
  'wait',
  'until',
  'i',
  'see',
  'the',
  'beetle',
  'itself',
  'if',
  'i',
  'am',
  'to',
  'form',
  'any',
  'idea',
  'of',
  'its',
  'personal',
  'appearance'],
 ['i',
  'don',
  't',
  'know',
  'said',
  'he',
  'a',
  'little',
  'nettled',
  'i',
  'draw',
  'tolerably',
  'should',
  'do',
  'it',
  'at',
  'least',
  'have',
  'had',
  'good',
  'masters',
  'and',
  'flatter',
  'myself',
  'that',
  'i',
  'am',
  'not',
  'quite',
  'a',
  'blockhead'],
 ['but',
  'my',
  'dear',
  'fellow',
  'you',
  'are',
  'joking',
  'then',
  'said',
  'i',
  'this',
  'is',
  'a',
  'very',
  'passable',
  'skull',
  'indeed',
  'i',
  'may',
  'say',
  'that',
  'it',
  'is',
  'a',
  'very',
  'excellent',
  'skull',
  'according',
  'to',
  'the',
  'vulgar',
  'notions',
  'about',
  'such',
  'specimens',
  'of',
  'physiology',
  'and',
  'your',
  'scarabæus',
  'must',
  'be',
  'the',
  'queerest',
  'scarabæus',
  'in',
  'the',
  'world',
  'if',
  'it',
  'resembles',
  'it'],
 ['why',
  'we',
  'may',
  'get',
  'up',
  'a',
  'very',
  'thrilling',
  'bit',
  'of',
  'superstition',
  'upon',
  'this',
  'hint'],
 ['i',
  'presume',
  'you',
  'will',
  'call',
  'the',
  'bug',
  'scarabæus',
  'caput',
  'hominis',
  'or',
  'something',
  'of',
  'that',
  'kind',
  'there',
  'are',
  'many',
  'similar',
  'titles',
  'in',
  'the'],
 ['but', 'where', 'are', 'the', 'antennæ', 'you', 'spoke', 'of'],
 ['the',
  'antennæ',
  'said',
  'who',
  'seemed',
  'to',
  'be',
  'getting',
  'unaccountably',
  'warm',
  'upon',
  'the',
  'subject',
  'i',
  'am',
  'sure',
  'you',
  'must',
  'see',
  'the',
  'antennæ'],
 ['i',
  'made',
  'them',
  'as',
  'distinct',
  'as',
  'they',
  'are',
  'in',
  'the',
  'original',
  'insect',
  'and',
  'i',
  'presume',
  'that',
  'is',
  'sufficient'],
 ['well',
  'well',
  'i',
  'said',
  'perhaps',
  'you',
  'have',
  'still',
  'i',
  'don',
  't',
  'see',
  'them',
  'and',
  'i',
  'handed',
  'him',
  'the',
  'paper',
  'without',
  'additional',
  'remark',
  'not',
  'wishing',
  'to',
  'ruffle',
  'his',
  'temper',
  'but',
  'i',
  'was',
  'much',
  'surprised',
  'at',
  'the',
  'turn',
  'affairs',
  'had',
  'taken',
  'his',
  'ill',
  'humor',
  'puzzled',
  'me',
  'and',
  'as',
  'for',
  'the',
  'drawing',
  'of',
  'the',
  'beetle',
  'there',
  'were',
  'positively',
  'no',
  'antennæ',
  'visible',
  'and',
  'the',
  'whole',
  'did',
  'bear',
  'a',
  'very',
  'close',
  'resemblance',
  'to',
  'the',
  'ordinary',
  'cuts',
  'of',
  'a',
  'death',
  'shead'],
 ['he',
  'received',
  'the',
  'paper',
  'very',
  'peevishly',
  'and',
  'was',
  'about',
  'to',
  'crumple',
  'it',
  'apparently',
  'to',
  'throw',
  'it',
  'in',
  'the',
  'fire',
  'when',
  'a',
  'casual',
  'glance',
  'at',
  'the',
  'design',
  'seemed',
  'suddenly',
  'to',
  'rivet',
  'his',
  'attention'],
 ['in',
  'an',
  'instant',
  'his',
  'face',
  'grew',
  'violently',
  'red',
  'in',
  'another',
  'as',
  'excessively',
  'pale'],
 ['for',
  'some',
  'minutes',
  'he',
  'continued',
  'to',
  'scrutinize',
  'the',
  'drawing',
  'minutely',
  'where',
  'he',
  'sat'],
 ['at',
  'length',
  'he',
  'arose',
  'took',
  'a',
  'candle',
  'from',
  'the',
  'table',
  'and',
  'proceeded',
  'to',
  'seat',
  'himself',
  'upon',
  'a',
  'seachest',
  'in',
  'the',
  'farthest',
  'corner',
  'of',
  'the',
  'room'],
 ['here',
  'again',
  'he',
  'made',
  'an',
  'anxious',
  'examination',
  'of',
  'the',
  'paper',
  'turning',
  'it',
  'in',
  'all',
  'directions'],
 ['he',
  'said',
  'nothing',
  'however',
  'and',
  'his',
  'conduct',
  'greatly',
  'astonished',
  'me',
  'yet',
  'i',
  'thought',
  'it',
  'prudent',
  'not',
  'to',
  'exacerbate',
  'the',
  'growing',
  'moodiness',
  'of',
  'his',
  'temper',
  'by',
  'any',
  'comment'],
 ['presently',
  'he',
  'took',
  'from',
  'his',
  'coat',
  'pocket',
  'a',
  'wallet',
  'placed',
  'the',
  'paper',
  'carefully',
  'in',
  'it',
  'and',
  'deposited',
  'both',
  'in',
  'a',
  'writingdesk',
  'which',
  'he',
  'locked'],
 ['he',
  'now',
  'grew',
  'more',
  'composed',
  'in',
  'his',
  'demeanor',
  'but',
  'his',
  'original',
  'air',
  'of',
  'enthusiasm',
  'had',
  'quite',
  'disappeared'],
 ['yet', 'he', 'seemed', 'not', 'so', 'much', 'sulky', 'as', 'abstracted'],
 ['as',
  'the',
  'evening',
  'wore',
  'away',
  'he',
  'became',
  'more',
  'and',
  'more',
  'absorbed',
  'in',
  'reverie',
  'from',
  'which',
  'no',
  'sallies',
  'of',
  'mine',
  'could',
  'arouse',
  'him'],
 ['it',
  'had',
  'been',
  'my',
  'intention',
  'to',
  'pass',
  'the',
  'night',
  'at',
  'the',
  'hut',
  'as',
  'i',
  'had',
  'frequently',
  'done',
  'before',
  'but',
  'seeing',
  'my',
  'host',
  'in',
  'this',
  'mood',
  'i',
  'deemed',
  'it',
  'proper',
  'to',
  'take',
  'leave'],
 ['he',
  'did',
  'not',
  'press',
  'me',
  'to',
  'remain',
  'but',
  'as',
  'i',
  'departed',
  'he',
  'shook',
  'my',
  'hand',
  'with',
  'even',
  'more',
  'than',
  'his',
  'usual',
  'cordiality'],
 ['it',
  'was',
  'about',
  'a',
  'month',
  'after',
  'this',
  'and',
  'during',
  'the',
  'interval',
  'i',
  'had',
  'seen',
  'nothing',
  'of',
  'when',
  'i',
  'received',
  'a',
  'visit',
  'at',
  'from',
  'his',
  'man'],
 ['i',
  'had',
  'never',
  'seen',
  'the',
  'good',
  'old',
  'negro',
  'look',
  'so',
  'dispirited',
  'and',
  'i',
  'feared',
  'that',
  'some',
  'serious',
  'disaster',
  'had',
  'befallen',
  'my',
  'friend'],
 ['said',
  'i',
  'what',
  'is',
  'the',
  'matter',
  'now',
  'how',
  'is',
  'your',
  'master'],
 ['why',
  'to',
  'speak',
  'de',
  'troof',
  'massa',
  'him',
  'not',
  'so',
  'berry',
  'well',
  'as',
  'mought',
  'be'],
 ['not', 'well'],
 ['i', 'am', 'truly', 'sorry', 'to', 'hear', 'it'],
 ['what', 'does', 'he', 'complain', 'of'],
 ['dat',
  's',
  'it',
  'him',
  'neber',
  'plain',
  'of',
  'notin',
  'but',
  'him',
  'berry',
  'sick',
  'for',
  'all',
  'dat'],
 ['sick', 'why', 'didn', 't', 'you', 'say', 'so', 'at', 'once'],
 ['is', 'he', 'confined', 'to', 'bed'],
 ['dat',
  'he',
  'aint',
  'he',
  'aint',
  'find',
  'nowhar',
  'dat',
  's',
  'just',
  'whar',
  'de',
  'shoe',
  'pinch',
  'my',
  'mind',
  'is',
  'got',
  'to',
  'be',
  'berry',
  'hebby',
  'bout',
  'poor'],
 ['i',
  'should',
  'like',
  'to',
  'understand',
  'what',
  'it',
  'is',
  'you',
  'are',
  'talking',
  'about'],
 ['you', 'say', 'your', 'master', 'is', 'sick'],
 ['t', 'he', 'told', 'you', 'what', 'ails', 'him'],
 ['why',
  'massa',
  'taint',
  'worf',
  'while',
  'for',
  'to',
  'git',
  'mad',
  'about',
  'de',
  'matter',
  'say',
  'noffin',
  'at',
  'all',
  'aint',
  'de',
  'matter',
  'wid',
  'him',
  'but',
  'den',
  'what',
  'make',
  'him',
  'go',
  'about',
  'looking',
  'dis',
  'here',
  'way',
  'wid',
  'he',
  'head',
  'down',
  'and',
  'he',
  'soldiers',
  'up',
  'and',
  'as',
  'white',
  'as',
  'a',
  'gose'],
 ['and', 'den', 'he', 'keep', 'a', 'syphon', 'all', 'de', 'time'],
 ['keeps', 'a', 'what'],
 ['a',
  'syphon',
  'wid',
  'de',
  'figgurs',
  'on',
  'de',
  'slate',
  'de',
  'queerest',
  'figgurs',
  'i',
  'ebber',
  'did',
  'see'],
 ['gittin', 'to', 'be', 'skeered', 'i', 'tell', 'you'],
 ['for', 'to', 'keep', 'mighty', 'tight', 'eye', 'pon', 'him', 'noovers'],
 ['todder',
  'day',
  'he',
  'gib',
  'me',
  'slip',
  'fore',
  'de',
  'sun',
  'up',
  'and',
  'was',
  'gone',
  'de',
  'whole',
  'ob',
  'de',
  'blessed',
  'day'],
 ['i',
  'had',
  'a',
  'big',
  'stick',
  'ready',
  'cut',
  'for',
  'to',
  'gib',
  'him',
  'deuced',
  'good',
  'beating',
  'when',
  'he',
  'did',
  'come',
  'but',
  'sich',
  'a',
  'fool',
  'dat',
  'i',
  'hadn',
  't',
  'de',
  'heart',
  'arter',
  'all',
  'he',
  'look',
  'so',
  'berry',
  'poorly'],
 ['what',
  'ah',
  'yes',
  'upon',
  'the',
  'whole',
  'i',
  'think',
  'you',
  'had',
  'better',
  'not',
  'be',
  'too',
  'severe',
  'with',
  'the',
  'poor',
  'fellow',
  'don',
  't',
  'flog',
  'him',
  'he',
  'can',
  't',
  'very',
  'well',
  'stand',
  'it',
  'but',
  'can',
  'you',
  'form',
  'no',
  'idea',
  'of',
  'what',
  'has',
  'occasioned',
  'this',
  'illness',
  'or',
  'rather',
  'this',
  'change',
  'of',
  'conduct'],
 ['anything', 'unpleasant', 'happened', 'since', 'i', 'saw', 'you'],
 ['massa',
  'dey',
  'aint',
  'bin',
  'noffin',
  'unpleasant',
  'since',
  'den',
  'twas',
  'fore',
  'den',
  'i',
  'm',
  'feared',
  'twas',
  'de',
  'berry',
  'day',
  'you',
  'was',
  'dare'],
 ['how'],
 ['what', 'do', 'you', 'mean'],
 ['why', 'massa', 'i', 'mean', 'de', 'bug', 'dare', 'now'],
 ['the', 'what'],
 ['bug',
  'i',
  'm',
  'berry',
  'sartain',
  'dat',
  'bin',
  'bit',
  'somewhere',
  'bout',
  'de',
  'head',
  'by',
  'dat',
  'goolebug'],
 ['and', 'what', 'cause', 'have', 'you', 'for', 'such', 'a', 'supposition'],
 ['enuff', 'massa', 'and', 'mouth', 'too'],
 ['i',
  'nebber',
  'did',
  'see',
  'sick',
  'a',
  'deuced',
  'bug',
  'he',
  'kick',
  'and',
  'he',
  'bite',
  'ebery',
  'ting',
  'what',
  'cum',
  'near',
  'him'],
 ['cotch',
  'him',
  'fuss',
  'but',
  'had',
  'for',
  'to',
  'let',
  'him',
  'go',
  'gin',
  'mighty',
  'quick',
  'i',
  'tell',
  'you',
  'den',
  'was',
  'de',
  'time',
  'he',
  'must',
  'ha',
  'got',
  'de',
  'bite'],
 ['i',
  'did',
  'n',
  't',
  'like',
  'de',
  'look',
  'oh',
  'de',
  'bug',
  'mouff',
  'myself',
  'no',
  'how',
  'so',
  'i',
  'would',
  'n',
  't',
  'take',
  'hold',
  'ob',
  'him',
  'wid',
  'my',
  'finger',
  'but',
  'i',
  'cotch',
  'him',
  'wid',
  'a',
  'piece',
  'ob',
  'paper',
  'dat',
  'i',
  'found'],
 ['i',
  'rap',
  'him',
  'up',
  'in',
  'de',
  'paper',
  'and',
  'stuff',
  'piece',
  'ob',
  'it',
  'in',
  'he',
  'mouff',
  'dat',
  'was',
  'de',
  'way'],
 ['and',
  'you',
  'think',
  'then',
  'that',
  'your',
  'master',
  'was',
  'really',
  'bitten',
  'by',
  'the',
  'beetle',
  'and',
  'that',
  'the',
  'bite',
  'made',
  'him',
  'sick'],
 ['i', 'do', 'n', 't', 'tink', 'noffin', 'about', 'it', 'i', 'nose', 'it'],
 ['what',
  'make',
  'him',
  'dream',
  'bout',
  'de',
  'goole',
  'so',
  'much',
  'if',
  'taint',
  'cause',
  'he',
  'bit',
  'by',
  'de',
  'goolebug'],
 ['heerd', 'bout', 'dem', 'goolebugs', 'fore', 'dis'],
 ['but', 'how', 'do', 'you', 'know', 'he', 'dreams', 'about', 'gold'],
 ['how', 'i', 'know'],
 ['why',
  'cause',
  'he',
  'talk',
  'about',
  'it',
  'in',
  'he',
  'sleep',
  'dat',
  's',
  'how',
  'i',
  'nose'],
 ['perhaps',
  'you',
  'are',
  'right',
  'but',
  'to',
  'what',
  'fortunate',
  'circumstance',
  'am',
  'i',
  'to',
  'attribute',
  'the',
  'honor',
  'of',
  'a',
  'visit',
  'from',
  'you',
  'today'],
 ['what', 'de', 'matter', 'massa'],
 ['you', 'bring', 'any', 'message', 'from'],
 ['massa',
  'i',
  'bring',
  'dis',
  'here',
  'pissel',
  'and',
  'here',
  'handed',
  'me',
  'a',
  'note',
  'which',
  'ran',
  'thus'],
 ['my'],
 ['why', 'have', 'i', 'not', 'seen', 'you', 'for', 'so', 'long', 'a', 'time'],
 ['i',
  'hope',
  'you',
  'have',
  'not',
  'been',
  'so',
  'foolish',
  'as',
  'to',
  'take',
  'offence',
  'at',
  'any',
  'little',
  'brusquerie',
  'of',
  'mine',
  'but',
  'no',
  'that',
  'is',
  'improbable'],
 ['since',
  'i',
  'saw',
  'you',
  'i',
  'have',
  'had',
  'great',
  'cause',
  'for',
  'anxiety'],
 ['i',
  'have',
  'something',
  'to',
  'tell',
  'you',
  'yet',
  'scarcely',
  'know',
  'how',
  'to',
  'tell',
  'it',
  'or',
  'whether',
  'i',
  'should',
  'tell',
  'it',
  'at',
  'all'],
 ['i',
  'have',
  'not',
  'been',
  'quite',
  'well',
  'for',
  'some',
  'days',
  'past',
  'and',
  'poor',
  'old',
  'annoys',
  'me',
  'almost',
  'beyond',
  'endurance',
  'by',
  'his',
  'wellmeant',
  'attentions',
  'would',
  'you',
  'believe',
  'it',
  'he',
  'had',
  'prepared',
  'a',
  'huge',
  'stick',
  'the',
  'other',
  'day',
  'with',
  'which',
  'to',
  'chastise',
  'me',
  'for',
  'giving',
  'him',
  'the',
  'slip',
  'and',
  'spending',
  'the',
  'day',
  'among',
  'the',
  'hills',
  'on',
  'the',
  'main',
  'land'],
 ['i',
  'verily',
  'believe',
  'that',
  'my',
  'ill',
  'looks',
  'alone',
  'saved',
  'me',
  'a',
  'flogging'],
 ['i',
  'have',
  'made',
  'no',
  'addition',
  'to',
  'my',
  'cabinet',
  'since',
  'we',
  'met'],
 ['if',
  'you',
  'can',
  'in',
  'any',
  'way',
  'make',
  'it',
  'convenient',
  'come',
  'over',
  'with'],
 ['do', 'come'],
 ['i',
  'wish',
  'to',
  'see',
  'you',
  'tonight',
  'upon',
  'business',
  'of',
  'importance'],
 ['i',
  'assure',
  'you',
  'that',
  'it',
  'is',
  'of',
  'the',
  'highest',
  'importance'],
 ['ever', 'yours'],
 ['there',
  'was',
  'something',
  'in',
  'the',
  'tone',
  'of',
  'this',
  'note',
  'which',
  'gave',
  'me',
  'great',
  'uneasiness'],
 ['its', 'whole', 'style', 'differed', 'materially', 'from', 'that', 'of'],
 ['what', 'could', 'he', 'be', 'dreaming', 'of'],
 ['what', 'new', 'crotchet', 'possessed', 'his', 'excitable', 'brain'],
 ['what',
  'business',
  'of',
  'the',
  'highest',
  'importance',
  'could',
  'he',
  'possibly',
  'have',
  'to',
  'transact'],
 ['s', 'account', 'of', 'him', 'boded', 'no', 'good'],
 ['i',
  'dreaded',
  'lest',
  'the',
  'continued',
  'pressure',
  'of',
  'misfortune',
  'had',
  'at',
  'length',
  'fairly',
  'unsettled',
  'the',
  'reason',
  'of',
  'my',
  'friend'],
 ['without',
  'a',
  'moment',
  's',
  'hesitation',
  'therefore',
  'i',
  'prepared',
  'to',
  'accompany',
  'the',
  'negro'],
 ['upon',
  'reaching',
  'the',
  'wharf',
  'i',
  'noticed',
  'a',
  'scythe',
  'and',
  'three',
  'spades',
  'all',
  'apparently',
  'new',
  'lying',
  'in',
  'the',
  'bottom',
  'of',
  'the',
  'boat',
  'in',
  'which',
  'we',
  'were',
  'to',
  'embark'],
 ['what', 'is', 'the', 'meaning', 'of', 'all', 'this', 'i', 'inquired'],
 ['syfe', 'massa', 'and', 'spade'],
 ['true', 'but', 'what', 'are', 'they', 'doing', 'here'],
 ['de',
  'syfe',
  'and',
  'de',
  'spade',
  'what',
  'will',
  'sis',
  'pon',
  'my',
  'buying',
  'for',
  'him',
  'in',
  'de',
  'town',
  'and',
  'de',
  'debbils',
  'own',
  'lot',
  'of',
  'money',
  'i',
  'had',
  'to',
  'gib',
  'for',
  'em'],
 ['but',
  'what',
  'in',
  'the',
  'name',
  'of',
  'all',
  'that',
  'is',
  'mysterious',
  'is',
  'your',
  'going',
  'to',
  'do',
  'with',
  'scythes',
  'and',
  'spades'],
 ['s',
  'more',
  'dan',
  'i',
  'know',
  'and',
  'debbil',
  'take',
  'me',
  'if',
  'i',
  'don',
  't',
  'blieve',
  'tis',
  'more',
  'dan',
  'he',
  'know',
  'too'],
 ['but', 'it', 's', 'all', 'cum', 'ob', 'do', 'bug'],
 ['finding',
  'that',
  'no',
  'satisfaction',
  'was',
  'to',
  'be',
  'obtained',
  'of',
  'whose',
  'whole',
  'intellect',
  'seemed',
  'to',
  'be',
  'absorbed',
  'by',
  'de',
  'bug',
  'i',
  'now',
  'stepped',
  'into',
  'the',
  'boat',
  'and',
  'made',
  'sail'],
 ['with',
  'a',
  'fair',
  'and',
  'strong',
  'breeze',
  'we',
  'soon',
  'ran',
  'into',
  'the',
  'little',
  'cove',
  'to',
  'the',
  'northward',
  'of',
  'and',
  'a',
  'walk',
  'of',
  'some',
  'two',
  'miles',
  'brought',
  'us',
  'to',
  'the',
  'hut'],
 ['it',
  'was',
  'about',
  'three',
  'in',
  'the',
  'afternoon',
  'when',
  'we',
  'arrived'],
 ['had', 'been', 'awaiting', 'us', 'in', 'eager', 'expectation'],
 ['he',
  'grasped',
  'my',
  'hand',
  'with',
  'a',
  'nervous',
  'empressement',
  'which',
  'alarmed',
  'me',
  'and',
  'strengthened',
  'the',
  'suspicions',
  'already',
  'entertained'],
 ['his',
  'countenance',
  'was',
  'pale',
  'even',
  'to',
  'ghastliness',
  'and',
  'his',
  'deepset',
  'eyes',
  'glared',
  'with',
  'unnatural',
  'lustre'],
 ['after',
  'some',
  'inquiries',
  'respecting',
  'his',
  'health',
  'i',
  'asked',
  'him',
  'not',
  'knowing',
  'what',
  'better',
  'to',
  'say',
  'if',
  'he',
  'had',
  'yet',
  'obtained',
  'the',
  'scarabæus',
  'from'],
 ['yes',
  'he',
  'replied',
  'coloring',
  'violently',
  'i',
  'got',
  'it',
  'from',
  'him',
  'the',
  'next',
  'morning'],
 ['nothing',
  'should',
  'tempt',
  'me',
  'to',
  'part',
  'with',
  'that',
  'scarabæus'],
 ['do', 'you', 'know', 'that', 'is', 'quite', 'right', 'about', 'it'],
 ['in',
  'what',
  'way',
  'i',
  'asked',
  'with',
  'a',
  'sad',
  'foreboding',
  'at',
  'heart'],
 ['in',
  'supposing',
  'it',
  'to',
  'be',
  'a',
  'bug',
  'of',
  'real',
  'gold',
  'he',
  'said',
  'this',
  'with',
  'an',
  'air',
  'of',
  'profound',
  'seriousness',
  'and',
  'i',
  'felt',
  'inexpressibly',
  'shocked'],
 ['this',
  'bug',
  'is',
  'to',
  'make',
  'my',
  'fortune',
  'he',
  'continued',
  'with',
  'a',
  'triumphant',
  'smile',
  'to',
  'reinstate',
  'me',
  'in',
  'my',
  'family',
  'possessions'],
 ['is', 'it', 'any', 'wonder', 'then', 'that', 'i', 'prize', 'it'],
 ['since',
  'has',
  'thought',
  'fit',
  'to',
  'bestow',
  'it',
  'upon',
  'me',
  'i',
  'have',
  'only',
  'to',
  'use',
  'it',
  'properly',
  'and',
  'i',
  'shall',
  'arrive',
  'at',
  'the',
  'gold',
  'of',
  'which',
  'it',
  'is',
  'the',
  'index'],
 ['bring', 'me', 'that', 'scarabæus'],
 ['what'],
 ['de', 'bug', 'massa'],
 ['i',
  'd',
  'rudder',
  'not',
  'go',
  'fer',
  'trubble',
  'dat',
  'bug',
  'you',
  'mus',
  'git',
  'him',
  'for',
  'your',
  'own',
  'self',
  'arose',
  'with',
  'a',
  'grave',
  'and',
  'stately',
  'air',
  'and',
  'brought',
  'me',
  'the',
  'beetle',
  'from',
  'a',
  'glass',
  'case',
  'in',
  'which',
  'it',
  'was',
  'enclosed'],
 ['it',
  'was',
  'a',
  'beautiful',
  'scarabæus',
  'and',
  'at',
  'that',
  'time',
  'unknown',
  'to',
  'naturalists',
  'of',
  'course',
  'a',
  'great',
  'prize',
  'in',
  'a',
  'scientific',
  'point',
  'of',
  'view'],
 ['there',
  'were',
  'two',
  'round',
  'black',
  'spots',
  'near',
  'one',
  'extremity',
  'of',
  'the',
  'back',
  'and',
  'a',
  'long',
  'one',
  'near',
  'the',
  'other'],
 ['the',
  'scales',
  'were',
  'exceedingly',
  'hard',
  'and',
  'glossy',
  'with',
  'all',
  'the',
  'appearance',
  'of',
  'burnished',
  'gold'],
 ['the',
  'weight',
  'of',
  'the',
  'insect',
  'was',
  'very',
  'remarkable',
  'and',
  'taking',
  'all',
  'things',
  'into',
  'consideration',
  'i',
  'could',
  'hardly',
  'blame',
  'for',
  'his',
  'opinion',
  'respecting',
  'it',
  'but',
  'what',
  'to',
  'make',
  'of',
  's',
  'concordance',
  'with',
  'that',
  'opinion',
  'i',
  'could',
  'not',
  'for',
  'the',
  'life',
  'of',
  'me',
  'tell'],
 ['i',
  'sent',
  'for',
  'you',
  'said',
  'he',
  'in',
  'a',
  'grandiloquent',
  'tone',
  'when',
  'i',
  'had',
  'completed',
  'my',
  'examination',
  'of',
  'the',
  'beetle',
  'i',
  'sent',
  'for',
  'you',
  'that',
  'i',
  'might',
  'have',
  'your',
  'counsel',
  'and',
  'assistance',
  'in',
  'furthering',
  'the',
  'views',
  'of',
  'and',
  'of',
  'the',
  'bug'],
 ['my',
  'dear',
  'i',
  'cried',
  'interrupting',
  'him',
  'you',
  'are',
  'certainly',
  'unwell',
  'and',
  'had',
  'better',
  'use',
  'some',
  'little',
  'precautions'],
 ['you',
  'shall',
  'go',
  'to',
  'bed',
  'and',
  'i',
  'will',
  'remain',
  'with',
  'you',
  'a',
  'few',
  'days',
  'until',
  'you',
  'get',
  'over',
  'this'],
 ['you', 'are', 'feverish', 'and'],
 ['my', 'pulse', 'said', 'he'],
 ['i',
  'felt',
  'it',
  'and',
  'to',
  'say',
  'the',
  'truth',
  'found',
  'not',
  'the',
  'slightest',
  'indication',
  'of',
  'fever'],
 ['but', 'you', 'may', 'be', 'ill', 'and', 'yet', 'have', 'no', 'fever'],
 ['allow', 'me', 'this', 'once', 'to', 'prescribe', 'for', 'you'],
 ['in', 'the', 'first', 'place', 'go', 'to', 'bed'],
 ['in', 'the', 'next'],
 ['you',
  'are',
  'mistaken',
  'he',
  'interposed',
  'i',
  'am',
  'as',
  'well',
  'as',
  'i',
  'can',
  'expect',
  'to',
  'be',
  'under',
  'the',
  'excitement',
  'which',
  'i',
  'suffer'],
 ['if',
  'you',
  'really',
  'wish',
  'me',
  'well',
  'you',
  'will',
  'relieve',
  'this',
  'excitement'],
 ['and', 'how', 'is', 'this', 'to', 'be', 'done'],
 ['easily'],
 ['and',
  'myself',
  'are',
  'going',
  'upon',
  'an',
  'expedition',
  'into',
  'the',
  'hills',
  'upon',
  'the',
  'main',
  'land',
  'and',
  'in',
  'this',
  'expedition',
  'we',
  'shall',
  'need',
  'the',
  'aid',
  'of',
  'some',
  'person',
  'in',
  'whom',
  'we',
  'can',
  'confide'],
 ['you', 'are', 'the', 'only', 'one', 'we', 'can', 'trust'],
 ['whether',
  'we',
  'succeed',
  'or',
  'fail',
  'the',
  'excitement',
  'which',
  'you',
  'now',
  'perceive',
  'in',
  'me',
  'will',
  'be',
  'equally',
  'allayed'],
 ['i',
  'am',
  'anxious',
  'to',
  'oblige',
  'you',
  'in',
  'any',
  'way',
  'i',
  'replied',
  'but',
  'do',
  'you',
  'mean',
  'to',
  'say',
  'that',
  'this',
  'infernal',
  'beetle',
  'has',
  'any',
  'connection',
  'with',
  'your',
  'expedition',
  'into',
  'the',
  'hills'],
 ['it', 'has'],
 ['then',
  'i',
  'can',
  'become',
  'a',
  'party',
  'to',
  'no',
  'such',
  'absurd',
  'proceeding'],
 ['i',
  'am',
  'sorry',
  'very',
  'sorry',
  'for',
  'we',
  'shall',
  'have',
  'to',
  'try',
  'it',
  'by',
  'ourselves'],
 ['try', 'it', 'by', 'yourselves'],
 ['the',
  'man',
  'is',
  'surely',
  'mad',
  'but',
  'stay',
  'how',
  'long',
  'do',
  'you',
  'propose',
  'to',
  'be',
  'absent'],
 ['probably', 'all', 'night'],
 ['we',
  'shall',
  'start',
  'immediately',
  'and',
  'be',
  'back',
  'at',
  'all',
  'events',
  'by',
  'sunrise'],
 ['and',
  'will',
  'you',
  'promise',
  'me',
  'upon',
  'your',
  'honor',
  'that',
  'when',
  'this',
  'freak',
  'of',
  'yours',
  'is',
  'over',
  'and',
  'the',
  'bug',
  'business',
  'good'],
 ['settled',
  'to',
  'your',
  'satisfaction',
  'you',
  'will',
  'then',
  'return',
  'home',
  'and',
  'follow',
  'my',
  'advice',
  'implicitly',
  'as',
  'that',
  'of',
  'your',
  'physician'],
 ['i',
  'promise',
  'and',
  'now',
  'let',
  'us',
  'be',
  'off',
  'for',
  'we',
  'have',
  'no',
  'time',
  'to',
  'lose'],
 ['with', 'a', 'heavy', 'heart', 'i', 'accompanied', 'my', 'friend'],
 ['we',
  'started',
  'about',
  'four',
  'o',
  'clock',
  'the',
  'dog',
  'and',
  'myself'],
 ['had',
  'with',
  'him',
  'the',
  'scythe',
  'and',
  'spades',
  'the',
  'whole',
  'of',
  'which',
  'he',
  'insisted',
  'upon',
  'carrying',
  'more',
  'through',
  'fear',
  'it',
  'seemed',
  'to',
  'me',
  'of',
  'trusting',
  'either',
  'of',
  'the',
  'implements',
  'within',
  'reach',
  'of',
  'his',
  'master',
  'than',
  'from',
  'any',
  'excess',
  'of',
  'industry',
  'or',
  'complaisance'],
 ['his',
  'demeanor',
  'was',
  'dogged',
  'in',
  'the',
  'extreme',
  'and',
  'dat',
  'deuced',
  'bug',
  'were',
  'the',
  'sole',
  'words',
  'which',
  'escaped',
  'his',
  'lips',
  'during',
  'the',
  'journey'],
 ['for',
  'my',
  'own',
  'part',
  'i',
  'had',
  'charge',
  'of',
  'a',
  'couple',
  'of',
  'dark',
  'lanterns',
  'while',
  'contented',
  'himself',
  'with',
  'the',
  'scarabæus',
  'which',
  'he',
  'carried',
  'attached',
  'to',
  'the',
  'end',
  'of',
  'a',
  'bit',
  'of',
  'whipcord',
  'twirling',
  'it',
  'to',
  'and',
  'fro',
  'with',
  'the',
  'air',
  'of',
  'a',
  'conjuror',
  'as',
  'he',
  'went'],
 ['when',
  'i',
  'observed',
  'this',
  'last',
  'plain',
  'evidence',
  'of',
  'my',
  'friend',
  's',
  'aberration',
  'of',
  'mind',
  'i',
  'could',
  'scarcely',
  'refrain',
  'from',
  'tears'],
 ['i',
  'thought',
  'it',
  'best',
  'however',
  'to',
  'humor',
  'his',
  'fancy',
  'at',
  'least',
  'for',
  'the',
  'present',
  'or',
  'until',
  'i',
  'could',
  'adopt',
  'some',
  'more',
  'energetic',
  'measures',
  'with',
  'a',
  'chance',
  'of',
  'success'],
 ['in',
  'the',
  'mean',
  'time',
  'i',
  'endeavored',
  'but',
  'all',
  'in',
  'vain',
  'to',
  'sound',
  'him',
  'in',
  'regard',
  'to',
  'the',
  'object',
  'of',
  'the',
  'expedition'],
 ['having',
  'succeeded',
  'in',
  'inducing',
  'me',
  'to',
  'accompany',
  'him',
  'he',
  'seemed',
  'unwilling',
  'to',
  'hold',
  'conversation',
  'upon',
  'any',
  'topic',
  'of',
  'minor',
  'importance',
  'and',
  'to',
  'all',
  'my',
  'questions',
  'vouchsafed',
  'no',
  'other',
  'reply',
  'than',
  'we',
  'shall',
  'see'],
 ['we',
  'crossed',
  'the',
  'creek',
  'at',
  'the',
  'head',
  'of',
  'the',
  'island',
  'by',
  'means',
  'of',
  'a',
  'skiff',
  'and',
  'ascending',
  'the',
  'high',
  'grounds',
  'on',
  'the',
  'shore',
  'of',
  'the',
  'main',
  'land',
  'proceeded',
  'in',
  'a',
  'northwesterly',
  'direction',
  'through',
  'a',
  'tract',
  'of',
  'country',
  'excessively',
  'wild',
  'and',
  'desolate',
  'where',
  'no',
  'trace',
  'of',
  'a',
  'human',
  'footstep',
  'was',
  'to',
  'be',
  'seen'],
 ['led',
  'the',
  'way',
  'with',
  'decision',
  'pausing',
  'only',
  'for',
  'an',
  'instant',
  'here',
  'and',
  'there',
  'to',
  'consult',
  'what',
  'appeared',
  'to',
  'be',
  'certain',
  'landmarks',
  'of',
  'his',
  'own',
  'contrivance',
  'upon',
  'a',
  'former',
  'occasion'],
 ['in',
  'this',
  'manner',
  'we',
  'journeyed',
  'for',
  'about',
  'two',
  'hours',
  'and',
  'the',
  'sun',
  'was',
  'just',
  'setting',
  'when',
  'we',
  'entered',
  'a',
  'region',
  'infinitely',
  'more',
  'dreary',
  'than',
  'any',
  'yet',
  'seen'],
 ['it',
  'was',
  'a',
  'species',
  'of',
  'table',
  'land',
  'near',
  'the',
  'summit',
  'of',
  'an',
  'almost',
  'inaccessible',
  'hill',
  'densely',
  'wooded',
  'from',
  'base',
  'to',
  'pinnacle',
  'and',
  'interspersed',
  'with',
  'huge',
  'crags',
  'that',
  'appeared',
  'to',
  'lie',
  'loosely',
  'upon',
  'the',
  'soil',
  'and',
  'in',
  'many',
  'cases',
  'were',
  'prevented',
  'from',
  'precipitating',
  'themselves',
  'into',
  'the',
  'valleys',
  'below',
  'merely',
  'by',
  'the',
  'support',
  'of',
  'the',
  'trees',
  'against',
  'which',
  'they',
  'reclined'],
 ['deep',
  'ravines',
  'in',
  'various',
  'directions',
  'gave',
  'an',
  'air',
  'of',
  'still',
  'sterner',
  'solemnity',
  'to',
  'the',
  'scene'],
 ['the',
  'natural',
  'platform',
  'to',
  'which',
  'we',
  'had',
  'clambered',
  'was',
  'thickly',
  'overgrown',
  'with',
  'brambles',
  'through',
  'which',
  'we',
  'soon',
  'discovered',
  'that',
  'it',
  'would',
  'have',
  'been',
  'impossible',
  'to',
  'force',
  'our',
  'way',
  'but',
  'for',
  'the',
  'scythe',
  'and',
  'by',
  'direction',
  'of',
  'his',
  'master',
  'proceeded',
  'to',
  'clear',
  'for',
  'us',
  'a',
  'path',
  'to',
  'the',
  'foot',
  'of',
  'an',
  'enormously',
  'tall',
  'tuliptree',
  'which',
  'stood',
  'with',
  'some',
  'eight',
  'or',
  'ten',
  'oaks',
  'upon',
  'the',
  'level',
  'and',
  'far',
  'surpassed',
  'them',
  'all',
  'and',
  'all',
  'other',
  'trees',
  'which',
  'i',
  'had',
  'then',
  'ever',
  'seen',
  'in',
  'the',
  'beauty',
  'of',
  'its',
  'foliage',
  'and',
  'form',
  'in',
  'the',
  'wide',
  'spread',
  'of',
  'its',
  'branches',
  'and',
  'in',
  'the',
  'general',
  'majesty',
  'of',
  'its',
  'appearance'],
 ['when',
  'we',
  'reached',
  'this',
  'tree',
  'turned',
  'to',
  'and',
  'asked',
  'him',
  'if',
  'he',
  'thought',
  'he',
  'could',
  'climb',
  'it'],
 ['the',
  'old',
  'man',
  'seemed',
  'a',
  'little',
  'staggered',
  'by',
  'the',
  'question',
  'and',
  'for',
  'some',
  'moments',
  'made',
  'no',
  'reply'],
 ['at',
  'length',
  'he',
  'approached',
  'the',
  'huge',
  'trunk',
  'walked',
  'slowly',
  'around',
  'it',
  'and',
  'examined',
  'it',
  'with',
  'minute',
  'attention'],
 ['when', 'he', 'had', 'completed', 'his', 'scrutiny', 'he', 'merely', 'said'],
 ['massa', 'climb', 'any', 'tree', 'he', 'ebber', 'see', 'in', 'he', 'life'],
 ['then',
  'up',
  'with',
  'you',
  'as',
  'soon',
  'as',
  'possible',
  'for',
  'it',
  'will',
  'soon',
  'be',
  'too',
  'dark',
  'to',
  'see',
  'what',
  'we',
  'are',
  'about'],
 ['how', 'far', 'mus', 'go', 'up', 'massa', 'inquired'],
 ['up',
  'the',
  'main',
  'trunk',
  'first',
  'and',
  'then',
  'i',
  'will',
  'tell',
  'you',
  'which',
  'way',
  'to',
  'go',
  'and',
  'here',
  'stop'],
 ['take', 'this', 'beetle', 'with', 'you'],
 ['bug',
  'de',
  'goole',
  'bug',
  'cried',
  'the',
  'negro',
  'drawing',
  'back',
  'in',
  'dismay',
  'what',
  'for',
  'mus',
  'tote',
  'de',
  'bug',
  'way',
  'up',
  'de',
  'tree',
  'dn',
  'if',
  'i',
  'do'],
 ['if',
  'you',
  'are',
  'afraid',
  'a',
  'great',
  'big',
  'negro',
  'like',
  'you',
  'to',
  'take',
  'hold',
  'of',
  'a',
  'harmless',
  'little',
  'dead',
  'beetle',
  'why',
  'you',
  'can',
  'carry',
  'it',
  'up',
  'by',
  'this',
  'string',
  'but',
  'if',
  'you',
  'do',
  'not',
  'take',
  'it',
  'up',
  'with',
  'you',
  'in',
  'some',
  'way',
  'i',
  'shall',
  'be',
  'under',
  'the',
  'necessity',
  'of',
  'breaking',
  'your',
  'head',
  'with',
  'this',
  'shovel'],
 ['what',
  'de',
  'matter',
  'now',
  'massa',
  'said',
  'evidently',
  'shamed',
  'into',
  'compliance',
  'always',
  'want',
  'for',
  'to',
  'raise',
  'fuss',
  'wid',
  'old',
  'nigger'],
 ['only', 'funnin', 'any', 'how'],
 ['feered', 'de', 'bug'],
 ['what',
  'i',
  'keer',
  'for',
  'de',
  'bug',
  'here',
  'he',
  'took',
  'cautiously',
  'hold',
  'of',
  'the',
  'extreme',
  'end',
  'of',
  'the',
  'string',
  'and',
  'maintaining',
  'the',
  'insect',
  'as',
  'far',
  'from',
  'his',
  'person',
  'as',
  'circumstances',
  'would',
  'permit',
  'prepared',
  'to',
  'ascend',
  'the',
  'tree'],
 ['in',
  'youth',
  'the',
  'tuliptree',
  'or',
  'the',
  'most',
  'magnificent',
  'of',
  'american',
  'foresters',
  'has',
  'a',
  'trunk',
  'peculiarly',
  'smooth',
  'and',
  'often',
  'rises',
  'to',
  'a',
  'great',
  'height',
  'without',
  'lateral',
  'branches',
  'but',
  'in',
  'its',
  'riper',
  'age',
  'the',
  'bark',
  'becomes',
  'gnarled',
  'and',
  'uneven',
  'while',
  'many',
  'short',
  'limbs',
  'make',
  'their',
  'appearance',
  'on',
  'the',
  'stem'],
 ['thus',
  'the',
  'difficulty',
  'of',
  'ascension',
  'in',
  'the',
  'present',
  'case',
  'lay',
  'more',
  'in',
  'semblance',
  'than',
  'in',
  'reality'],
 ['embracing',
  'the',
  'huge',
  'cylinder',
  'as',
  'closely',
  'as',
  'possible',
  'with',
  'his',
  'arms',
  'and',
  'knees',
  'seizing',
  'with',
  'his',
  'hands',
  'some',
  'projections',
  'and',
  'resting',
  'his',
  'naked',
  'toes',
  'upon',
  'others',
  'after',
  'one',
  'or',
  'two',
  'narrow',
  'escapes',
  'from',
  'falling',
  'at',
  'length',
  'wriggled',
  'himself',
  'into',
  'the',
  'first',
  'great',
  'fork',
  'and',
  'seemed',
  'to',
  'consider',
  'the',
  'whole',
  'business',
  'as',
  'virtually',
  'accomplished'],
 ['the',
  'risk',
  'of',
  'the',
  'achievement',
  'was',
  'in',
  'fact',
  'now',
  'over',
  'although',
  'the',
  'climber',
  'was',
  'some',
  'sixty',
  'or',
  'seventy',
  'feet',
  'from',
  'the',
  'ground'],
 ['way', 'mus', 'go', 'now', 'he', 'asked'],
 ['up',
  'the',
  'largest',
  'branch',
  'the',
  'one',
  'on',
  'this',
  'side',
  'said'],
 ['the',
  'negro',
  'obeyed',
  'him',
  'promptly',
  'and',
  'apparently',
  'with',
  'but',
  'little',
  'trouble',
  'ascending',
  'higher',
  'and',
  'higher',
  'until',
  'no',
  'glimpse',
  'of',
  'his',
  'squat',
  'figure',
  'could',
  'be',
  'obtained',
  'through',
  'the',
  'dense',
  'foliage',
  'which',
  'enveloped',
  'it'],
 ['presently',
  'his',
  'voice',
  'was',
  'heard',
  'in',
  'a',
  'sort',
  'of',
  'halloo'],
 ['how', 'much', 'fudder', 'is', 'got', 'for', 'go'],
 ['how', 'high', 'up', 'are', 'you', 'asked'],
 ['so',
  'fur',
  'replied',
  'the',
  'negro',
  'can',
  'see',
  'de',
  'sky',
  'fru',
  'de',
  'top',
  'ob',
  'de',
  'tree'],
 ['mind', 'the', 'sky', 'but', 'attend', 'to', 'what', 'i', 'say'],
 ['look',
  'down',
  'the',
  'trunk',
  'and',
  'count',
  'the',
  'limbs',
  'below',
  'you',
  'on',
  'this',
  'side'],
 ['how', 'many', 'limbs', 'have', 'you', 'passed'],
 ['one',
  'two',
  'tree',
  'four',
  'fibe',
  'i',
  'done',
  'pass',
  'fibe',
  'big',
  'limb',
  'massa',
  'pon',
  'dis',
  'side'],
 ['then', 'go', 'one', 'limb', 'higher'],
 ['in',
  'a',
  'few',
  'minutes',
  'the',
  'voice',
  'was',
  'heard',
  'again',
  'announcing',
  'that',
  'the',
  'seventh',
  'limb',
  'was',
  'attained'],
 ['now',
  'cried',
  'evidently',
  'much',
  'excited',
  'i',
  'want',
  'you',
  'to',
  'work',
  'your',
  'way',
  'out',
  'upon',
  'that',
  'limb',
  'as',
  'far',
  'as',
  'you',
  'can'],
 ['if',
  'you',
  'see',
  'anything',
  'strange',
  'let',
  'me',
  'know',
  'by',
  'this',
  'time',
  'what',
  'little',
  'doubt',
  'i',
  'might',
  'have',
  'entertained',
  'of',
  'my',
  'poor',
  'friend',
  's',
  'insanity',
  'was',
  'put',
  'finally',
  'at',
  'rest'],
 ['i',
  'had',
  'no',
  'alternative',
  'but',
  'to',
  'conclude',
  'him',
  'stricken',
  'with',
  'lunacy',
  'and',
  'i',
  'became',
  'seriously',
  'anxious',
  'about',
  'getting',
  'him',
  'home'],
 ['while',
  'i',
  'was',
  'pondering',
  'upon',
  'what',
  'was',
  'best',
  'to',
  'be',
  'done',
  's',
  'voice',
  'was',
  'again',
  'heard'],
 ['feerd',
  'for',
  'to',
  'ventur',
  'pon',
  'dis',
  'limb',
  'berry',
  'far',
  'tis',
  'dead',
  'limb',
  'putty',
  'much',
  'all',
  'de',
  'way'],
 ['you',
  'say',
  'it',
  'was',
  'a',
  'dead',
  'limb',
  'cried',
  'in',
  'a',
  'quavering',
  'voice'],
 ['massa',
  'him',
  'dead',
  'as',
  'de',
  'doornail',
  'done',
  'up',
  'for',
  'sartain',
  'done',
  'departed',
  'dis',
  'here',
  'life'],
 ['what',
  'in',
  'the',
  'name',
  'heaven',
  'shall',
  'i',
  'do',
  'asked',
  'seemingly',
  'in',
  'the',
  'greatest',
  'distress'],
 ['do',
  'said',
  'i',
  'glad',
  'of',
  'an',
  'opportunity',
  'to',
  'interpose',
  'a',
  'word',
  'why',
  'come',
  'home',
  'and',
  'go',
  'to',
  'bed'],
 ['now', 'that', 's', 'a', 'fine', 'fellow'],
 ['it',
  's',
  'getting',
  'late',
  'and',
  'besides',
  'you',
  'remember',
  'your',
  'promise'],
 ['cried',
  'he',
  'without',
  'heeding',
  'me',
  'in',
  'the',
  'least',
  'do',
  'you',
  'hear',
  'me'],
 ['hear', 'you', 'ebber', 'so', 'plain'],
 ['the',
  'wood',
  'well',
  'then',
  'with',
  'your',
  'knife',
  'and',
  'see',
  'if',
  'you',
  'think',
  'it',
  'very',
  'rotten'],
 ['rotten',
  'massa',
  'sure',
  'nuff',
  'replied',
  'the',
  'negro',
  'in',
  'a',
  'few',
  'moments',
  'but',
  'not',
  'so',
  'berry',
  'rotten',
  'as',
  'mought',
  'be'],
 ['ventur',
  'out',
  'leetle',
  'way',
  'pon',
  'de',
  'limb',
  'by',
  'myself',
  'dat',
  's',
  'true'],
 ['by', 'yourself', 'what', 'do', 'you', 'mean'],
 ['why', 'i', 'mean', 'de', 'bug'],
 ['berry', 'hebby', 'bug'],
 ['i',
  'drop',
  'him',
  'down',
  'fuss',
  'and',
  'den',
  'de',
  'limb',
  'won',
  't',
  'break',
  'wid',
  'just',
  'de',
  'weight',
  'ob',
  'one',
  'nigger'],
 ['you',
  'infernal',
  'scoundrel',
  'cried',
  'apparently',
  'much',
  'relieved',
  'what',
  'do',
  'you',
  'mean',
  'by',
  'telling',
  'me',
  'such',
  'nonsense',
  'as',
  'that'],
 ['as',
  'sure',
  'as',
  'you',
  'drop',
  'that',
  'beetle',
  'i',
  'll',
  'break',
  'your',
  'neck'],
 ['look', 'here', 'do', 'you', 'hear', 'me'],
 ['massa', 'needn', 't', 'hollo', 'at', 'poor', 'nigger', 'dat', 'style'],
 ['well'],
 ['now',
  'listen',
  'if',
  'you',
  'will',
  'venture',
  'out',
  'on',
  'the',
  'limb',
  'as',
  'far',
  'as',
  'you',
  'think',
  'safe',
  'and',
  'not',
  'let',
  'go',
  'the',
  'beetle',
  'i',
  'll',
  'make',
  'you',
  'a',
  'present',
  'of',
  'a',
  'silver',
  'dollar',
  'as',
  'soon',
  'as',
  'you',
  'get',
  'down'],
 ['i',
  'm',
  'gwine',
  'deed',
  'i',
  'is',
  'replied',
  'the',
  'negro',
  'very',
  'promptly',
  'mos',
  'out',
  'to',
  'the',
  'eend',
  'now'],
 ['to',
  'the',
  'end',
  'here',
  'fairly',
  'screamed',
  'do',
  'you',
  'say',
  'you',
  'are',
  'out',
  'to',
  'the',
  'end',
  'of',
  'that',
  'limb'],
 ['soon', 'be', 'to', 'de', 'eend', 'massa', 'oooooh'],
 ['lorgolamarcy'],
 ['what', 'is', 'dis', 'here', 'pon', 'de', 'tree'],
 ['well', 'cried', 'highly', 'delighted', 'what', 'is', 'it'],
 ['why',
  'taint',
  'noffin',
  'but',
  'a',
  'skull',
  'somebody',
  'bin',
  'lef',
  'him',
  'head',
  'up',
  'de',
  'tree',
  'and',
  'de',
  'crows',
  'done',
  'gobble',
  'ebery',
  'bit',
  'ob',
  'de',
  'meat',
  'off'],
 ...]

Generate word embeddings with Gensim's library

In [268]:
model = word2vec.Word2Vec(corpus, size=246, window=window, min_count=200, workers=4)

Visualize with tSNE

Generate coordinates to plot

In [269]:
coords = pd.DataFrame(index=range(len(model.wv.vocab)))
coords['label'] = [w for w in model.wv.vocab]
coords['vector'] = coords['label'].apply(lambda x: model.wv.get_vector(x))
In [270]:
coords.head()
Out[270]:
label vector
0 the [0.22036165, 0.26431423, -0.2840596, 0.0789844...
1 late [-0.091123305, -0.119603, -0.14193687, -0.0994...
2 from [-0.38256493, -0.25111318, 0.5238992, 0.039146...
3 that [-0.15131615, 0.36850616, -0.21218912, 0.04042...
4 to [-0.3417644, 0.112480424, 0.8044965, -0.105765...

Use ScikitLearn's TSNE library

In [271]:
tsne_model = TSNE(perplexity=40, n_components=2, init='pca', n_iter=2500, random_state=23)
tsne_values = tsne_model.fit_transform(coords['vector'].tolist())
In [272]:
coords['x'] = tsne_values[:,0]
coords['y'] = tsne_values[:,1]
In [273]:
coords.head()
Out[273]:
label vector x y
0 the [0.22036165, 0.26431423, -0.2840596, 0.0789844... 9.654496 3.837436
1 late [-0.091123305, -0.119603, -0.14193687, -0.0994... -0.693029 5.529316
2 from [-0.38256493, -0.25111318, 0.5238992, 0.039146... 12.801660 2.025463
3 that [-0.15131615, 0.36850616, -0.21218912, 0.04042... -3.497712 2.166096
4 to [-0.3417644, 0.112480424, 0.8044965, -0.105765... -7.868403 -0.119167

Plot the coordinates

In [274]:
px.scatter(coords, 'x', 'y', text='label', height=1000).update_traces(mode='text')

Semantic Algebra

Analogy

$A : B :: C : D? \rightarrow B - A + C = D$

In [275]:
def complete_analogy(A, B, C, n=2):
    try:
        return model.wv.most_similar(positive=[B, C], negative=[A])[0:n]
    except KeyError as e:
        print('Error:', e)
        return None
In [276]:
complete_analogy('i', 'we', 'you')
Out[276]:
[('us', 0.7015376687049866), ('our', 0.558246910572052)]
In [277]:
complete_analogy('man', 'woman', 'he')
Out[277]:
[('she', 0.7519389390945435), ('feeling', 0.4981950521469116)]
In [278]:
complete_analogy('man','boy','woman')
Out[278]:
[('girl', 0.8022274374961853), ('child', 0.7688707113265991)]

Sentiment Analysis

In [279]:
# set up 
#data_dir = 'data/'

novels_csv = 'TOKEN2.csv'
vocab_csv = 'VOCAB2.csv'
lib_csv = 'LIB3.csv'
bow_csv = 'BOW.csv'

# For TOKENS
OHCO = ['book_id', 'chap_num', 'para_num', 'sent_num', 'token_num']
BOOKS = OHCO[:1]
CHAPS = OHCO[:2]
PARAS = OHCO[:3]
SENTS = OHCO[:4]

salex_csv = 'salex_nrc.csv'
emo_cols = "anger anticipation disgust fear joy sadness surprise trust polarity".split()
In [280]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly_express as px
from IPython.core.display import display, HTML
In [281]:
sns.set()
%matplotlib inline

Acquire Data

In [282]:
TOKENS = pd.read_csv(novels_csv).set_index(OHCO)
VOCAB = pd.read_csv(vocab_csv).set_index('term_id')
LIB = pd.read_csv(lib_csv).set_index('book_id')
BOW = pd.read_csv(bow_csv)

# Add token_str to BOW
BOW['term_str'] = BOW.term_id.map(VOCAB.term_str)
BOW = BOW.set_index(['book_id','chap_num','term_id'])
In [283]:
BOW.sort_index()
Out[283]:
n c term_str
book_id chap_num term_id
1 1 5 1 1 005484
10 3 1 1
13 1 1 1000
16 1 1 10600
17 1 1 1080
... ... ... ... ... ...
3795 24 29449 2 1 yellow
29473 104 1 you
29474 4 1 young
29475 2 1 younger
29478 15 1 your

304741 rows × 3 columns

In [284]:
LIB
Out[284]:
book_id.1 book_title book_file author title
book_id
2787 An Old - fashioned Girl, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_AN_OLD-FASHIONED_GIRL... 2787 Alcott An Old - fashioned Girl
2726 Eight Cousins, by Louisa M. Alcott epubs_\ALCOTT_MAY_LOUISA_EIGHT_COUSINS-pg2726.txt 2726 Alcott Eight Cousins
2786 Jack and Jill, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_JACK_AND_JILL-pg2786.txt 2786 Alcott Jack and Jill
3499 Jo's Boys, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_JO’S_BOYS-pg3499.txt 3499 Alcott Jo's Boys
2788 Little Men, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_LITTLE_MEN-pg2788.txt 2788 Alcott Little Men
514 Little Women, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_LITTLE_WOMEN-pg514.txt 514 Alcott Little Women
2804 Rose in Bloom, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_ROSE_IN_BLOOM-pg2804.txt 2804 Alcott Rose in Bloom
3795 Under the Lilacs, by Louisa May Alcott epubs_\ALCOTT_MAY_LOUISA_UNDER_THE_LILACS-pg37... 3795 Alcott Under the Lilacs
1 The Works of Edgar Allan Poe Volume 1 (of 5) o... epubs/2147-0.txt 1 Poe The Works of Edgar Allan Poe Volume 1 (of 5) o...
2 The Works of Edgar Allan Poe Volume 2 (of 5) o... epubs/2148-0.txt 2 Poe The Works of Edgar Allan Poe Volume 2 (of 5) o...
3 The Works of Edgar Allan Poe Volume 3 (of 5) o... epubs/2149-0.txt 3 Poe The Works of Edgar Allan Poe Volume 3 (of 5) o...
4 The Works of Edgar Allan Poe Volume 4 (of 5) o... epubs/2150-0.txt 4 Poe The Works of Edgar Allan Poe Volume 4 (of 5) o...
5 The Works of Edgar Allan Poe Volume 5 (of 5) o... epubs/2151-0.txt 5 Poe The Works of Edgar Allan Poe Volume 5 (of 5) o...
In [285]:
# book name and id 
old = 2787
eight = 2726
jackjill = 2786
jo = 3499
littleM = 2788
littleW = 514
rose = 2804
under = 3795
poe1 = 1
poe2 = 2
poe3 = 3
poe4 = 4
poe5 = 5

Get SA Lexicon

In [286]:
salex_csv = 'salex_nrc.csv'
In [287]:
SALEX = pd.read_csv(salex_csv).set_index('term_str')
SALEX.columns = [col.replace('nrc_','') for col in SALEX.columns]
SALEX['polarity'] = SALEX.positive - SALEX.negative
In [288]:
SALEX
Out[288]:
anger anticipation disgust fear joy negative positive sadness surprise trust polarity
term_str
abandon 0 0 0 1 0 1 0 1 0 0 -1
abandoned 1 0 0 1 0 1 0 1 0 0 -1
abandonment 1 0 0 1 0 1 0 1 1 0 -1
abduction 0 0 0 1 0 1 0 1 1 0 -1
aberration 0 0 1 0 0 1 0 0 0 0 -1
... ... ... ... ... ... ... ... ... ... ... ...
young 0 1 0 0 1 0 1 0 1 0 1
youth 1 1 0 1 1 0 1 0 1 0 1
zeal 0 1 0 0 1 0 1 0 1 1 1
zealous 0 0 0 0 1 0 1 0 0 1 1
zest 0 1 0 0 1 0 1 0 0 1 1

3688 rows × 11 columns

Compute Sentiment

Combine SALEX with VOCAB

In [289]:
VOCAB.head()
Out[289]:
term_rank term_str n num stop stem_porter pos_max term_rank2 p zipf_k zipf_k2 zipf_k3 p2 h df idf tfidf_sum h2 x_factor2
term_id
26151 1 the 58042 0 1 the DT 1 1.963798 58042 58042 1.963798 0.052274 0.222570 247 0.001755 0.022318 7.652003 0.000000
1110 2 and 41247 0 1 and CC 2 1.395554 82494 82494 2.791108 0.037148 0.176474 247 0.001755 0.016572 7.820398 5.420687
17817 3 of 29442 0 1 of IN 3 0.996143 88326 88326 2.988429 0.026516 0.138865 247 0.001755 0.011051 7.532582 8.275387
26510 4 to 28593 0 1 to TO 4 0.967418 114372 114372 3.869671 0.025752 0.135947 247 0.001755 0.011093 7.777130 10.781392
219 5 a 26310 0 1 a DT 5 0.890175 131550 131550 4.450873 0.023695 0.127937 247 0.001755 0.010247 7.782000 12.524645
In [290]:
len(VOCAB)
Out[290]:
29556
In [291]:
V = pd.concat([VOCAB.reset_index().set_index('term_str'), SALEX], join='inner', axis=1)\
    .reset_index().set_index('term_id')
In [292]:
V.head()
Out[292]:
term_str term_rank n num stop stem_porter pos_max term_rank2 p zipf_k ... anticipation disgust fear joy negative positive sadness surprise trust polarity
term_id
11482 good 69 2193 0 0 good JJ 68 0.074198 151317 ... 1 0 0 1 0 1 0 1 1 1
10624 found 142 975 0 0 found VBD 139 0.032988 138450 ... 0 0 0 1 0 1 0 0 1 1
16868 mother 144 963 0 0 mother NN 141 0.032582 138672 ... 1 0 0 1 1 1 1 0 1 0
29474 young 151 917 0 0 young JJ 147 0.031026 138467 ... 1 0 0 1 0 1 0 1 0 1
3230 boy 167 828 0 0 boy NN 162 0.028015 138276 ... 0 1 0 0 1 0 0 0 0 -1

5 rows × 30 columns

In [293]:
len(V)
Out[293]:
2678
In [294]:
V.to_csv('Vocab_Sentiment.csv')

Combine VOCAB with BOW

In [295]:
BOWIDX = BOW.index
B = BOW.merge(V, on='term_str', how='left')
B.index = BOWIDX
B = B.dropna()

# didnt have tf, tfidf columns
bcols = ['n_x', 'term_str', 'term_rank', 'n_y', 'num', 'pos_max', 'term_rank2', 'p', 
'tfidf_sum', 'anger', 'anticipation', 'disgust', 'fear', 'joy', 'negative', 'positive', 'sadness', 'surprise', 'trust', 'polarity']

B = B[bcols]

for col in emo_cols:
    B[col] = B[col] * B.tfidf_sum # used to be B.tfidf
In [296]:
B.head()
Out[296]:
n_x term_str term_rank n_y num pos_max term_rank2 p tfidf_sum anger anticipation disgust fear joy negative positive sadness surprise trust polarity
book_id chap_num term_id
1 1 224 2 abandoned 4054.0 21.0 0.0 VBN 585.0 0.000711 0.005090 0.00509 0.0 0.0 0.005090 0.000000 1.0 0.0 0.005090 0.000000 0.0 -0.005090
295 2 absence 1545.0 65.0 0.0 NN 541.0 0.002199 0.008545 0.00000 0.0 0.0 0.008545 0.000000 1.0 0.0 0.008545 0.000000 0.0 -0.008545
336 1 abyss 4196.0 20.0 0.0 NN 586.0 0.000677 0.006253 0.00000 0.0 0.0 0.006253 0.000000 1.0 0.0 0.006253 0.000000 0.0 -0.006253
363 3 accident 1288.0 81.0 0.0 NN 525.0 0.002741 0.010454 0.00000 0.0 0.0 0.010454 0.000000 1.0 0.0 0.010454 0.010454 0.0 -0.010454
382 2 accomplish 5340.0 14.0 0.0 VB 592.0 0.000474 0.002949 0.00000 0.0 0.0 0.000000 0.002949 0.0 1.0 0.000000 0.000000 0.0 0.002949
In [297]:
B.tfidf_sum
Out[297]:
book_id  chap_num  term_id
1        1         224        0.005090
                   295        0.008545
                   336        0.006253
                   363        0.010454
                   382        0.002949
                                ...   
3795     24        28947      0.017297
                   29240      0.014370
                   29244      0.011817
                   29283      0.013311
                   29474      0.019783
Name: tfidf_sum, Length: 43398, dtype: float64

Explore Emotion Words and POS

In [298]:
B.pos_max.value_counts().sort_values().plot.barh()
Out[298]:
<matplotlib.axes._subplots.AxesSubplot at 0x2470b936c18>
In [299]:
EMO_BOOKS = B.groupby(['book_id'])[emo_cols].mean()
EMO_CHAPS = B.groupby(['book_id','chap_num'])[emo_cols].mean()
In [300]:
EMO_BOOKS.index = LIB.book_title
In [301]:
EMO_BOOKS.plot.bar(figsize=(25,10))
Out[301]:
<matplotlib.axes._subplots.AxesSubplot at 0x2470b9064e0>

Compare Texts

In [302]:
EIGHT = EMO_CHAPS.loc[eight].copy()
JACKJILL = EMO_CHAPS.loc[jackjill].copy()
OLD = EMO_CHAPS.loc[old].copy()
JO = EMO_CHAPS.loc[jo].copy()
LITTLEM = EMO_CHAPS.loc[littleM].copy()
LITTLEW = EMO_CHAPS.loc[littleW].copy()
ROSE = EMO_CHAPS.loc[rose].copy()
UNDER = EMO_CHAPS.loc[under].copy()
POE1 = EMO_CHAPS.loc[poe1].copy()
POE2 = EMO_CHAPS.loc[poe2].copy()
POE3 = EMO_CHAPS.loc[poe3].copy()
POE4 = EMO_CHAPS.loc[poe4].copy()
POE5 = EMO_CHAPS.loc[poe5].copy()
In [303]:
EIGHT.mean().sort_values().plot.barh();
In [304]:
JACKJILL.mean().sort_values().plot.barh();
In [305]:
OLD.mean().sort_values().plot.barh();
In [306]:
JO.mean().sort_values().plot.barh();
In [307]:
LITTLEM.mean().sort_values().plot.barh();
In [308]:
LITTLEW.mean().sort_values().plot.barh();
In [309]:
ROSE.mean().sort_values().plot.barh();
In [310]:
UNDER.mean().sort_values().plot.barh();
In [311]:
POE1.mean().sort_values().plot.barh();
In [312]:
POE2.mean().sort_values().plot.barh();
In [313]:
POE3.mean().sort_values().plot.barh();
In [314]:
POE4.mean().sort_values().plot.barh();
In [315]:
POE5.mean().sort_values().plot.barh();

What is Trust?

In [316]:
trust_words = pd.Series(SALEX[SALEX['trust'] == 1].index.tolist())
In [317]:
trust_words.sample(10)
Out[317]:
657           praise
395              god
467        indelible
475         innocent
402    grandchildren
227            crisp
363        formative
154           cogent
809        statement
419             hail
dtype: object

Mean Sentiment by Chapter

In [318]:
EIGHT_chaps = EMO_CHAPS.loc[eight] 
JACKJILL_chaps = EMO_CHAPS.loc[jackjill] 
OLD_chaps = EMO_CHAPS.loc[old]
JO_chaps = EMO_CHAPS.loc[jo]
LITTLEM_chaps = EMO_CHAPS.loc[littleM]
LITTLEW_chaps = EMO_CHAPS.loc[littleW]
ROSE_chaps = EMO_CHAPS.loc[rose]
UNDER_chaps = EMO_CHAPS.loc[under]
POE1_chaps = EMO_CHAPS.loc[poe1]
POE2_chaps = EMO_CHAPS.loc[poe2]
POE3_chaps = EMO_CHAPS.loc[poe3]
POE4_chaps = EMO_CHAPS.loc[poe4]
POE5_chaps = EMO_CHAPS.loc[poe5]
In [319]:
EIGHT_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[319]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
1 0.002218 0.004540 0.001484 0.003062 0.006274 0.003834 0.002254 0.004984 0.003144
2 0.001857 0.004506 0.001574 0.002005 0.005895 0.002520 0.003465 0.004947 0.004326
3 0.001897 0.003304 0.001740 0.002407 0.004911 0.003439 0.001908 0.004843 0.002505
4 0.001169 0.004936 0.001146 0.002664 0.005836 0.003226 0.001989 0.005441 0.003755
5 0.001603 0.004802 0.001289 0.001786 0.006715 0.002772 0.003053 0.006427 0.004784
6 0.001410 0.003965 0.001530 0.002149 0.006129 0.002271 0.002076 0.005548 0.004771
7 0.001307 0.004421 0.001533 0.001956 0.006353 0.002553 0.002243 0.005287 0.005002
8 0.001903 0.004445 0.001755 0.002628 0.005297 0.003283 0.002914 0.005953 0.003931
9 0.001636 0.004147 0.001135 0.002048 0.005737 0.002030 0.002395 0.004421 0.004652
10 0.001551 0.004189 0.000969 0.001662 0.006952 0.002336 0.002322 0.005112 0.005601
11 0.001914 0.004234 0.002004 0.002499 0.004771 0.004071 0.002088 0.005307 0.001408
12 0.001807 0.003917 0.001398 0.001972 0.005106 0.002532 0.001875 0.005184 0.003615
13 0.001452 0.004937 0.001366 0.001420 0.006662 0.002539 0.002015 0.006146 0.006473
14 0.002259 0.003739 0.001730 0.002807 0.004309 0.003009 0.002451 0.004494 0.001795
15 0.002683 0.003410 0.001973 0.002740 0.004223 0.003273 0.002186 0.004021 0.001150
16 0.001189 0.004906 0.000975 0.001206 0.007030 0.001765 0.002434 0.006736 0.007068
17 0.001719 0.004224 0.001475 0.002646 0.005212 0.002327 0.002173 0.006005 0.003629
18 0.001942 0.003076 0.001596 0.002206 0.003953 0.002826 0.001835 0.004597 0.001694
19 0.001603 0.003856 0.001577 0.003045 0.005400 0.002814 0.002238 0.005472 0.003388
20 0.001401 0.003840 0.001335 0.001709 0.006375 0.002328 0.002444 0.005304 0.004857
21 0.002329 0.003440 0.002271 0.003388 0.003266 0.004366 0.001822 0.004168 -0.001413
22 0.001253 0.004491 0.001549 0.001913 0.007078 0.002907 0.002740 0.006393 0.006508
23 0.002047 0.002951 0.001770 0.002678 0.004523 0.002898 0.001582 0.005336 0.002091
24 0.001813 0.003609 0.001233 0.002116 0.005086 0.002331 0.001866 0.005287 0.004287
In [320]:
JACKJILL_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[320]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
1 0.001793 0.003098 0.001548 0.003142 0.004031 0.003306 0.001774 0.004641 0.001883
2 0.002136 0.004500 0.002011 0.003764 0.005664 0.004873 0.002492 0.005253 0.002350
3 0.001995 0.003372 0.001716 0.002380 0.004607 0.003098 0.002114 0.003975 0.001979
4 0.001551 0.005056 0.001528 0.002388 0.006800 0.003040 0.002172 0.004980 0.004206
5 0.001435 0.004892 0.001685 0.001621 0.006808 0.001647 0.002754 0.006024 0.005665
6 0.001556 0.004274 0.001887 0.001990 0.005831 0.003013 0.002687 0.004382 0.003402
7 0.002273 0.003367 0.001721 0.002673 0.004079 0.003289 0.001747 0.004715 0.001162
8 0.001661 0.003811 0.001556 0.002072 0.004923 0.002559 0.001561 0.003987 0.002300
9 0.001677 0.003164 0.001471 0.002239 0.004627 0.002819 0.002045 0.005261 0.003203
10 0.001525 0.004198 0.001256 0.001685 0.006452 0.002793 0.002565 0.004967 0.004692
11 0.002098 0.003459 0.001673 0.002635 0.003695 0.002918 0.002174 0.003999 0.000797
12 0.002082 0.003538 0.001507 0.002362 0.004691 0.002830 0.002100 0.004156 0.002307
13 0.002334 0.003711 0.001557 0.002699 0.004134 0.003165 0.001733 0.004806 0.001524
14 0.002097 0.003741 0.001220 0.002246 0.005029 0.002365 0.002561 0.005777 0.003459
15 0.001323 0.004431 0.001196 0.001718 0.006693 0.002216 0.002626 0.006279 0.005608
16 0.001719 0.004491 0.001558 0.001606 0.006967 0.002824 0.002781 0.004992 0.004757
17 0.001504 0.004111 0.001619 0.002027 0.006080 0.002823 0.002373 0.005227 0.003725
18 0.001134 0.004201 0.001405 0.001873 0.006954 0.002709 0.002488 0.005224 0.004786
19 0.001900 0.003211 0.001285 0.002258 0.004958 0.002540 0.002023 0.004383 0.002789
20 0.001572 0.004797 0.001190 0.002679 0.006094 0.003375 0.002296 0.006167 0.004340
21 0.001636 0.003868 0.001416 0.001877 0.005589 0.002830 0.002096 0.004434 0.003245
22 0.001795 0.004106 0.001806 0.002313 0.005973 0.003341 0.002766 0.005176 0.004031
23 0.001724 0.003598 0.001667 0.001674 0.005229 0.002576 0.002374 0.004628 0.003664
24 0.001255 0.003908 0.001044 0.001269 0.005394 0.002049 0.002155 0.005630 0.005376
In [321]:
OLD_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[321]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
1 0.002019 0.004333 0.001820 0.002760 0.005201 0.003030 0.002605 0.004602 0.002061
2 0.002308 0.003560 0.001850 0.002333 0.004168 0.003346 0.002060 0.003974 0.001395
3 0.002285 0.003932 0.001803 0.002519 0.004768 0.003258 0.001873 0.005167 0.002094
4 0.002103 0.003894 0.001705 0.002674 0.005317 0.003370 0.002277 0.004908 0.002476
5 0.002975 0.003015 0.002636 0.002908 0.003972 0.003532 0.001871 0.004262 0.000745
6 0.001952 0.003586 0.001809 0.002469 0.004458 0.002916 0.002014 0.004083 0.001611
7 0.001587 0.003562 0.001582 0.002506 0.005612 0.003146 0.002041 0.004299 0.002788
8 0.001483 0.003932 0.001619 0.002230 0.005600 0.003119 0.002221 0.005099 0.003772
9 0.001931 0.003886 0.001728 0.002572 0.005390 0.004147 0.002012 0.005199 0.002765
10 0.001782 0.003748 0.001854 0.002521 0.005168 0.002987 0.002164 0.004739 0.003031
11 0.001772 0.003217 0.001565 0.002056 0.004640 0.002767 0.001744 0.004184 0.002719
12 0.001934 0.003564 0.001490 0.002042 0.005003 0.002539 0.002082 0.004681 0.003247
13 0.001824 0.004864 0.001557 0.002002 0.006079 0.002832 0.002649 0.004470 0.003765
14 0.001775 0.004058 0.001842 0.002442 0.005360 0.003190 0.001903 0.004965 0.002750
15 0.002207 0.003271 0.001556 0.002777 0.004233 0.003577 0.001657 0.003686 0.000396
16 0.002339 0.003886 0.001616 0.002234 0.004992 0.003206 0.002122 0.004717 0.002319
17 0.002030 0.003634 0.001464 0.002572 0.004095 0.003225 0.002049 0.004629 0.002040
18 0.001869 0.003910 0.001770 0.002913 0.005627 0.002773 0.002258 0.004812 0.002795
19 0.001533 0.003708 0.001598 0.002243 0.005413 0.002807 0.002265 0.005518 0.003532
In [322]:
JO_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[322]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
23 0.001711 0.003629 0.001553 0.002066 0.005673 0.002821 0.002080 0.005062 0.003484
24 0.001716 0.003998 0.001454 0.002393 0.006021 0.002311 0.002060 0.005355 0.004496
25 0.001746 0.003985 0.001358 0.002538 0.004789 0.002745 0.002161 0.004625 0.002666
26 0.001357 0.003812 0.001049 0.001938 0.005070 0.002381 0.002425 0.004762 0.003803
27 0.001966 0.003334 0.001484 0.002175 0.004628 0.002569 0.001737 0.004079 0.002096
28 0.001792 0.003518 0.001669 0.002832 0.004641 0.002907 0.001924 0.005304 0.002986
29 0.002356 0.003099 0.001745 0.003464 0.003780 0.003648 0.001964 0.004219 0.000951
30 0.001594 0.003945 0.001309 0.002142 0.005277 0.002834 0.002248 0.004574 0.003576
31 0.001755 0.003664 0.001267 0.002571 0.004950 0.002695 0.002318 0.004897 0.002940
32 0.002168 0.004492 0.001893 0.002473 0.005806 0.002744 0.002178 0.005159 0.003183
33 0.001980 0.003528 0.001819 0.003148 0.004346 0.004114 0.002493 0.004555 0.000999
34 0.002260 0.003058 0.002059 0.003084 0.003712 0.003160 0.001956 0.003780 0.000542
35 0.001778 0.003292 0.001702 0.002715 0.004674 0.002987 0.001906 0.004102 0.002241
36 0.001648 0.003179 0.001363 0.002412 0.004875 0.002898 0.001887 0.004423 0.002623
37 0.002054 0.003800 0.001320 0.002814 0.005712 0.003448 0.001921 0.005104 0.002983
38 0.002158 0.003339 0.001573 0.002582 0.004315 0.002750 0.002172 0.003937 0.000868
39 0.001692 0.004344 0.001334 0.001913 0.005428 0.002669 0.002426 0.005453 0.004063
40 0.001936 0.004435 0.001569 0.002409 0.006121 0.002651 0.002114 0.005239 0.004244
41 0.001336 0.003901 0.001450 0.002356 0.005833 0.002517 0.001917 0.005492 0.003471
42 0.001774 0.003641 0.001663 0.002849 0.004891 0.003550 0.002057 0.004204 0.001305
43 0.002138 0.004327 0.001552 0.003074 0.005200 0.003669 0.002472 0.004665 0.002564
44 0.002252 0.004131 0.001340 0.003162 0.005379 0.003570 0.002292 0.004549 0.002709
In [323]:
LITTLEM_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[323]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
22 0.001785 0.003750 0.001787 0.002236 0.005582 0.002740 0.002084 0.005468 0.004154
23 0.002356 0.004103 0.002114 0.002144 0.006150 0.002857 0.002435 0.005605 0.004182
24 0.001761 0.004529 0.001863 0.001663 0.006232 0.002777 0.002075 0.006388 0.005421
25 0.002469 0.003776 0.001694 0.002690 0.005523 0.003226 0.001929 0.005328 0.003356
26 0.001614 0.003995 0.001388 0.001859 0.005453 0.002806 0.002331 0.003904 0.003043
27 0.002439 0.003094 0.001652 0.002928 0.003759 0.003048 0.001786 0.003501 0.000319
28 0.002013 0.003729 0.001581 0.001976 0.005615 0.003668 0.001817 0.004648 0.002764
29 0.002069 0.003301 0.001672 0.002546 0.005001 0.003122 0.002194 0.003922 0.002122
30 0.002039 0.003404 0.001828 0.002421 0.004544 0.002933 0.002504 0.004006 0.001399
31 0.002119 0.003720 0.001908 0.002745 0.004934 0.003295 0.002135 0.004356 0.002103
32 0.002040 0.003910 0.001693 0.002290 0.005137 0.002189 0.002139 0.004326 0.003673
33 0.002157 0.003305 0.001678 0.002549 0.004532 0.002905 0.001950 0.003680 0.001372
34 0.001634 0.003516 0.001947 0.001825 0.005785 0.002827 0.002078 0.005221 0.003687
35 0.002629 0.002680 0.001933 0.002863 0.003304 0.003582 0.001653 0.003959 0.000297
36 0.001513 0.003794 0.001426 0.001936 0.005032 0.002308 0.002040 0.005612 0.004019
37 0.002288 0.002880 0.001789 0.002825 0.004383 0.002626 0.002053 0.003920 0.001238
38 0.001798 0.004185 0.001661 0.001980 0.006185 0.002649 0.002544 0.005207 0.004244
39 0.002939 0.003712 0.001797 0.002720 0.005412 0.003182 0.002344 0.004110 0.001847
40 0.002071 0.003683 0.002083 0.001927 0.005278 0.003588 0.001870 0.005416 0.003501
41 0.002872 0.003411 0.001898 0.002803 0.004575 0.003542 0.002123 0.004352 0.001253
42 0.001610 0.004117 0.001212 0.002317 0.006000 0.002830 0.002185 0.005162 0.004432
In [324]:
LITTLEW_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[324]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
1 0.002052 0.004010 0.001842 0.002318 0.005238 0.003585 0.002448 0.004331 0.002100
2 0.001776 0.003493 0.001483 0.002046 0.005644 0.002693 0.002659 0.004547 0.003660
3 0.001800 0.004667 0.001469 0.002537 0.006348 0.003205 0.002148 0.004726 0.003391
4 0.001973 0.003821 0.001636 0.002361 0.004632 0.003131 0.002273 0.004230 0.002058
5 0.001650 0.004220 0.002032 0.002279 0.006232 0.002956 0.002502 0.005562 0.003870
6 0.000982 0.004549 0.001082 0.002181 0.007809 0.002587 0.002988 0.005677 0.005554
7 0.002452 0.003284 0.002122 0.002751 0.004118 0.004315 0.001663 0.003842 0.000161
8 0.002709 0.003708 0.001602 0.003192 0.004442 0.003729 0.002076 0.004001 0.000252
9 0.001779 0.004138 0.001609 0.002076 0.005712 0.002866 0.001917 0.004763 0.002859
10 0.001418 0.003847 0.000874 0.001713 0.005573 0.002275 0.002343 0.005104 0.004079
11 0.001891 0.003679 0.001705 0.002106 0.005079 0.003160 0.002010 0.004042 0.001797
12 0.001988 0.003286 0.001776 0.002166 0.004791 0.002892 0.002145 0.004554 0.002576
13 0.001967 0.004359 0.001504 0.001937 0.006608 0.002617 0.002350 0.005404 0.004359
14 0.002228 0.004812 0.001838 0.002128 0.006566 0.002118 0.003234 0.005225 0.004485
15 0.002188 0.004199 0.001674 0.002536 0.005234 0.003718 0.002517 0.004935 0.001647
16 0.001674 0.004190 0.001407 0.002411 0.006027 0.003191 0.002104 0.005736 0.004183
17 0.002062 0.004028 0.002171 0.003645 0.003928 0.004915 0.001920 0.004517 -0.000418
18 0.002007 0.004173 0.001206 0.003007 0.005261 0.004329 0.002334 0.004976 0.001468
19 0.001799 0.003741 0.001494 0.002345 0.005568 0.003326 0.002198 0.004464 0.003070
20 0.001854 0.004408 0.001627 0.002018 0.006083 0.002836 0.001995 0.005205 0.003432
21 0.002394 0.003444 0.001923 0.003033 0.003756 0.003011 0.002050 0.003941 0.000181
22 0.001209 0.004786 0.001257 0.001498 0.007109 0.003047 0.002529 0.005054 0.004750
23 0.001546 0.004350 0.001629 0.002712 0.005536 0.002524 0.002708 0.005388 0.002995
24 0.001427 0.003717 0.001649 0.002259 0.005025 0.002576 0.002276 0.004938 0.002704
25 0.001326 0.003847 0.001048 0.002082 0.006085 0.002729 0.002182 0.005708 0.004720
26 0.001900 0.003772 0.000768 0.002118 0.004783 0.002462 0.002277 0.003790 0.002071
In [325]:
ROSE_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[325]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
23 0.001609 0.003660 0.001236 0.002305 0.005043 0.002370 0.001954 0.004940 0.003559
24 0.001425 0.004351 0.001625 0.002022 0.005578 0.002525 0.002311 0.005487 0.004037
25 0.001402 0.003752 0.001074 0.002127 0.005203 0.002545 0.002153 0.005468 0.003955
26 0.001662 0.004195 0.001591 0.002177 0.005248 0.002305 0.001908 0.005493 0.003854
27 0.001895 0.003833 0.001689 0.002083 0.004454 0.002666 0.002131 0.005202 0.003150
28 0.002000 0.003267 0.001573 0.002402 0.004061 0.003286 0.001835 0.004129 0.001605
29 0.001352 0.003378 0.001359 0.001965 0.005141 0.002507 0.002147 0.004419 0.003433
30 0.001730 0.003685 0.001645 0.002391 0.004637 0.002935 0.001753 0.004979 0.003192
31 0.001780 0.003626 0.001460 0.002412 0.004425 0.003060 0.001865 0.004883 0.002682
32 0.002553 0.003955 0.001511 0.003158 0.005027 0.003279 0.002102 0.005489 0.002702
33 0.001554 0.004090 0.001403 0.002253 0.005636 0.002911 0.002116 0.005364 0.003876
34 0.001690 0.003583 0.001409 0.002306 0.004720 0.002682 0.001946 0.004542 0.003165
35 0.001784 0.003729 0.001829 0.002146 0.005086 0.002592 0.002075 0.005526 0.003610
36 0.001960 0.004136 0.001869 0.002618 0.005566 0.002823 0.001825 0.005561 0.002617
37 0.002109 0.003172 0.001570 0.003495 0.003360 0.004140 0.001753 0.004614 -0.000605
38 0.002296 0.003639 0.001932 0.002444 0.005113 0.003207 0.002039 0.004680 0.002460
39 0.001638 0.004109 0.001571 0.002135 0.005512 0.002635 0.002359 0.005273 0.003764
40 0.001420 0.003727 0.001078 0.002186 0.005115 0.002679 0.002431 0.005137 0.003963
41 0.000929 0.004736 0.001014 0.001569 0.006064 0.002568 0.002164 0.006271 0.005930
42 0.001268 0.004454 0.001328 0.001808 0.006361 0.002807 0.002030 0.006014 0.005567
43 0.001835 0.003452 0.001559 0.002525 0.005189 0.002955 0.002071 0.004885 0.002908
44 0.001237 0.003915 0.000799 0.001604 0.005566 0.002238 0.001692 0.004857 0.004586
In [326]:
UNDER_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[326]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
1 0.002549 0.003411 0.001843 0.002711 0.004791 0.003506 0.002684 0.003989 0.000824
2 0.001793 0.003777 0.001297 0.002110 0.004738 0.003673 0.002419 0.004055 0.001927
3 0.002085 0.003429 0.001932 0.002826 0.005213 0.003500 0.001680 0.004076 0.001397
4 0.002407 0.003201 0.001971 0.002944 0.005131 0.002912 0.002636 0.003701 0.000866
5 0.001834 0.003775 0.002256 0.002640 0.005407 0.003162 0.002346 0.005033 0.002706
6 0.001181 0.004377 0.001346 0.001833 0.006532 0.002028 0.002331 0.006214 0.005582
7 0.001725 0.004087 0.001815 0.002544 0.006032 0.002877 0.003189 0.004218 0.002466
8 0.001874 0.005514 0.001895 0.003013 0.007739 0.002827 0.002836 0.006139 0.003768
9 0.001541 0.004073 0.001475 0.002695 0.006097 0.002523 0.002821 0.004585 0.002720
10 0.001626 0.003483 0.001842 0.002841 0.005638 0.003646 0.001955 0.004739 0.001700
11 0.001848 0.003506 0.001620 0.002880 0.005513 0.003190 0.001978 0.004723 0.003404
12 0.002026 0.003741 0.001569 0.002374 0.005405 0.002449 0.002516 0.004764 0.003607
13 0.002283 0.004214 0.001735 0.002904 0.004515 0.002886 0.002277 0.004066 0.000770
14 0.002335 0.003083 0.001739 0.002965 0.004040 0.003277 0.001998 0.003329 0.000226
15 0.002205 0.003178 0.001763 0.003835 0.004419 0.003586 0.001975 0.003186 -0.000245
16 0.002425 0.003254 0.001949 0.003223 0.003783 0.003416 0.001871 0.003570 0.000136
17 0.001985 0.003551 0.001333 0.002892 0.004604 0.002393 0.002198 0.004236 0.001973
18 0.001832 0.003106 0.001869 0.002669 0.004908 0.002518 0.001819 0.004344 0.001873
19 0.001917 0.003753 0.001735 0.002805 0.004928 0.002617 0.002633 0.004242 0.001597
20 0.001867 0.004530 0.001365 0.002210 0.005259 0.001976 0.002502 0.005107 0.002948
21 0.001794 0.004255 0.001628 0.002406 0.005464 0.002693 0.001979 0.004872 0.002501
22 0.001815 0.003719 0.001652 0.002582 0.004581 0.002464 0.002267 0.003927 0.001139
23 0.001584 0.003431 0.001447 0.002445 0.004878 0.002764 0.001788 0.004236 0.001508
24 0.001429 0.003329 0.001414 0.002441 0.004153 0.002344 0.001435 0.004432 0.002144
In [327]:
POE1_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[327]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
1 0.001730 0.002349 0.001298 0.002443 0.002332 0.002380 0.001464 0.003069 0.000164
2 0.001763 0.002934 0.001305 0.002483 0.002925 0.002309 0.001873 0.003522 0.001008
3 0.001839 0.002918 0.001342 0.002634 0.003720 0.002219 0.001589 0.004329 0.001976
4 0.001839 0.002068 0.001201 0.002619 0.002164 0.002357 0.001524 0.003019 0.000168
5 0.001741 0.002245 0.001140 0.002318 0.002318 0.002366 0.001316 0.003051 0.000477
6 0.001590 0.003068 0.000820 0.002482 0.003149 0.002126 0.001517 0.003783 0.001812
7 0.002317 0.002207 0.001517 0.003564 0.002163 0.003376 0.001531 0.002662 -0.001412
8 0.001619 0.002624 0.001497 0.001964 0.005094 0.003226 0.001986 0.003229 0.001505
In [328]:
POE2_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[328]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
1 0.001388 0.002631 0.001088 0.002046 0.002484 0.002055 0.001552 0.003861 0.001597
2 0.002267 0.002486 0.001931 0.003169 0.003162 0.003161 0.001604 0.003646 -0.000070
3 0.002452 0.002592 0.001714 0.003675 0.002274 0.003192 0.001818 0.002919 -0.001598
4 0.001483 0.002492 0.001204 0.002095 0.002214 0.002108 0.001336 0.004870 0.002183
5 0.001629 0.002746 0.001210 0.002903 0.002999 0.002738 0.001808 0.003624 0.000699
6 0.001686 0.002896 0.001435 0.003017 0.002694 0.003077 0.002093 0.003693 0.000121
7 0.002642 0.002188 0.001949 0.003538 0.002438 0.003559 0.001455 0.002584 -0.001417
8 0.002090 0.002141 0.001416 0.002706 0.002540 0.003079 0.001729 0.002636 -0.000483
9 0.003100 0.001197 0.002210 0.003699 0.001192 0.004640 0.001920 0.001011 -0.003636
10 0.002185 0.001567 0.001952 0.003267 0.002718 0.003863 0.001794 0.002026 -0.001455
11 0.001583 0.002524 0.001174 0.002613 0.003900 0.003127 0.001760 0.003685 0.000504
12 0.002522 0.002537 0.001712 0.003429 0.001849 0.003565 0.001023 0.002946 -0.002033
13 0.001430 0.003045 0.001412 0.002107 0.004087 0.002830 0.001389 0.003572 0.001982
14 0.001686 0.002782 0.001018 0.002588 0.003662 0.002918 0.002193 0.003112 0.000949
15 0.002360 0.002054 0.001357 0.003452 0.001714 0.002984 0.001424 0.002120 -0.001857
16 0.001644 0.002244 0.001365 0.002850 0.002075 0.002918 0.001308 0.002450 -0.000723
17 0.001382 0.002690 0.000768 0.001793 0.003760 0.001845 0.001642 0.003700 0.002387
18 0.001282 0.003333 0.001298 0.001375 0.004990 0.002427 0.002443 0.004411 0.003500
19 0.001958 0.001911 0.001321 0.002425 0.002275 0.002421 0.001306 0.002646 0.000112
20 0.003535 0.002295 0.002355 0.005030 0.002169 0.004819 0.001562 0.002166 -0.004007
21 0.002560 0.002213 0.001570 0.003697 0.002841 0.003883 0.001974 0.002917 -0.001081
22 0.001743 0.003543 0.001379 0.002482 0.004521 0.002648 0.001645 0.004014 0.001896
In [329]:
POE3_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[329]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
1 0.001852 0.002303 0.001319 0.002805 0.002827 0.003004 0.001415 0.002615 -0.000300
2 0.001975 0.002989 0.001447 0.003145 0.004757 0.004224 0.002329 0.003130 0.000001
3 0.001914 0.002547 0.001507 0.002801 0.002530 0.002885 0.001959 0.003094 -0.000297
4 0.001509 0.002628 0.001037 0.001854 0.003519 0.002299 0.001687 0.003985 0.002253
5 0.002038 0.002396 0.001962 0.002830 0.002509 0.002935 0.001661 0.003364 -0.000038
6 0.001547 0.002564 0.001689 0.002270 0.003754 0.001992 0.001989 0.004907 0.001555
In [330]:
POE4_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[330]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
1 0.002208 0.003418 0.001410 0.003039 0.003556 0.002442 0.001314 0.005215 0.002064
2 0.000865 0.003475 0.001212 0.002392 0.005598 0.002030 0.001609 0.007227 0.005574
3 0.001727 0.002297 0.002106 0.002446 0.002505 0.002571 0.001227 0.003849 -0.000156
4 0.001982 0.002217 0.001241 0.002844 0.002337 0.002499 0.001560 0.002583 -0.000344
5 0.001518 0.002305 0.001212 0.002454 0.003084 0.002566 0.001415 0.004010 0.001432
6 0.001330 0.002634 0.001068 0.001784 0.003284 0.002368 0.001607 0.004015 0.001981
7 0.001639 0.003248 0.001035 0.001972 0.003389 0.001986 0.001742 0.005709 0.003164
8 0.001819 0.002460 0.001420 0.002429 0.003257 0.002238 0.001543 0.004152 0.001907
9 0.001762 0.002941 0.001280 0.002121 0.003980 0.002283 0.002422 0.004358 0.003011
10 0.001358 0.003343 0.001153 0.001621 0.003730 0.001576 0.001632 0.005172 0.002519
11 0.001521 0.002147 0.001226 0.002195 0.002406 0.002210 0.001243 0.003279 0.000617
12 0.001584 0.002918 0.001313 0.002048 0.003590 0.002945 0.001990 0.003947 0.001238
13 0.001648 0.002436 0.001796 0.002723 0.002200 0.002976 0.001408 0.003305 0.000147
14 0.001454 0.003406 0.001236 0.002336 0.003505 0.002277 0.001970 0.004709 0.002112
15 0.001620 0.003092 0.001566 0.001910 0.003646 0.002304 0.001573 0.004992 0.002219
16 0.001584 0.002868 0.000861 0.001458 0.003888 0.001534 0.001735 0.003855 0.002920
17 0.001224 0.002298 0.001046 0.001766 0.002186 0.002032 0.001393 0.003679 0.001060
18 0.001856 0.002596 0.001302 0.002624 0.003173 0.002814 0.001481 0.003273 0.000777
19 0.001980 0.002003 0.001909 0.003483 0.002360 0.003052 0.001440 0.003090 -0.001007
In [331]:
POE5_chaps.style.background_gradient(cmap='YlGn', high=.25)
Out[331]:
anger anticipation disgust fear joy sadness surprise trust polarity
chap_num
1 0.001793 0.002454 0.001667 0.001775 0.003737 0.002615 0.001428 0.003914 0.002327
2 0.001835 0.003057 0.001478 0.002568 0.003499 0.002888 0.002000 0.004059 0.001609
3 0.001870 0.002206 0.001632 0.003402 0.002952 0.003448 0.001663 0.002932 -0.000437
4 0.001588 0.002672 0.001020 0.002272 0.003879 0.002332 0.002117 0.003444 0.001659
5 0.002313 0.002639 0.001337 0.003201 0.003107 0.002827 0.002053 0.002695 -0.000078
6 0.002083 0.002543 0.001783 0.002507 0.002525 0.002946 0.001216 0.003876 0.000195
7 0.001999 0.002211 0.001417 0.002223 0.002475 0.002614 0.001735 0.004142 0.001043
8 0.002384 0.003240 0.001089 0.002932 0.005959 0.003410 0.002626 0.005965 0.003344
9 0.001637 0.002653 0.001472 0.001957 0.002769 0.002323 0.001608 0.004346 0.002026
10 0.001352 0.002556 0.000923 0.002080 0.003154 0.002255 0.001782 0.004080 0.001826

Create Stacked Dataframes (for Plotly Express)

In [332]:
EIGHT_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(eight))
JACKJILL_chaps_thin= EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(jackjill))
OLD_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(old))
JO_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(jo))
LITTLEM_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(littleM))
LITTLEW_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(littleW))
ROSE_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(rose))
UNDER_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(under))
POE1_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(poe1))
POE2_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(poe2))
POE3_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(poe3))
POE4_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(poe4))
POE5_chaps_thin = EMO_CHAPS.stack().to_frame().reset_index().rename(columns={0:'value','level_2':'emo'}).query("book_id == {}".format(poe5))
In [333]:
def plot_sentiments(df, emo='polarity'):
    FIG = dict(figsize=(25, 5), legend=True, fontsize=14, rot=45)
    df[emo].plot(**FIG)
In [334]:
# plot_sentiments(PERSUASION_chaps, emo_cols)
px.line(EIGHT_chaps_thin, x='chap_num', y='value', color='emo')
In [408]:
# plot_sentiments(MOBYDICK_chaps, emo_cols)
px.line(JACKJILL_chaps_thin, x='chap_num', y='value', color='emo')
In [335]:
px.line(OLD_chaps_thin, x='chap_num', y='value', color='emo')
In [336]:
px.line(JO_chaps_thin, x='chap_num', y='value', color='emo')
In [337]:
px.line(LITTLEM_chaps_thin, x='chap_num', y='value', color='emo')
In [338]:
px.line(LITTLEW_chaps_thin, x='chap_num', y='value', color='emo')
In [339]:
px.line(ROSE_chaps_thin, x='chap_num', y='value', color='emo')
In [340]:
px.line(UNDER_chaps_thin, x='chap_num', y='value', color='emo')
In [341]:
px.line(POE1_chaps_thin, x='chap_num', y='value', color='emo')
In [342]:
px.line(POE2_chaps_thin, x='chap_num', y='value', color='emo')
In [343]:
px.line(POE3_chaps_thin, x='chap_num', y='value', color='emo')
In [344]:
px.line(POE4_chaps_thin, x='chap_num', y='value', color='emo')
In [345]:
px.line(POE5_chaps_thin, x='chap_num', y='value', color='emo')

Cose Read Sentiment in Texts

Combine VOCAB + SALEX with TOKENS

We need to do this to reconstruct the sentences, which are lost in the BOW representation.

In [346]:
TOKENSIDX = TOKENS.index
T = TOKENS.merge(V, on='term_str', how='left')
T.index = TOKENSIDX
T = T.fillna(0)
In [347]:
EIGHT2 = T.loc[eight].copy()
JACKJILL2 = T.loc[jackjill].copy()
OLD2 = T.loc[old].copy()
JO2 = T.loc[jo].copy()
LITTLEM2 = T.loc[littleM].copy()
LITTLEW2 = T.loc[littleW].copy()
ROSE2 = T.loc[rose].copy()
UNDER2 = T.loc[under].copy()
POE12 = T.loc[poe1].copy()
POE22 = T.loc[poe2].copy() 
POE32 = T.loc[poe3].copy()
POE42 = T.loc[poe4].copy()
POE52 = T.loc[poe5].copy()
In [348]:
emo = 'polarity'
In [349]:
EIGHT2['html'] = EIGHT2.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
JACKJILL2['html'] =  JACKJILL2.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
OLD2['html'] =  OLD2.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
JO2['html'] =  JO2.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
LITTLEM2['html'] =  LITTLEM2.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
LITTLEW2['html'] =  LITTLEW2.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
ROSE2['html'] =  ROSE2.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
UNDER2['html'] =  UNDER2.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
POE12['html'] =  POE12.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
POE22['html'] =  POE22.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
POE32['html'] =  POE32.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
POE42['html'] =  POE42.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
POE52['html'] =  POE52.apply(lambda x: "<span class='sent{}'>{}</span>".format(int(np.sign(x[emo])), x.term_str), 1)
In [350]:
EIGHT2['html'].sample(10)
Out[350]:
chap_num  para_num  sent_num  token_num
4         58        0         10             <span class='sent0'>declare</span>
21        20        1         15                 <span class='sent0'>and</span>
17        83        1         7                <span class='sent0'>tarts</span>
          62        3         10                  <span class='sent0'>to</span>
18        26        1         23                  <span class='sent0'>of</span>
7         77        2         25                   <span class='sent0'>a</span>
4         39        0         4                  <span class='sent0'>the</span>
2         2         1         2                <span class='sent0'>could</span>
24        96        3         8            <span class='sent0'>gutenberg</span>
9         93        0         9                  <span class='sent0'>who</span>
Name: html, dtype: object
In [351]:
EIGHT2
Out[351]:
pos_tuple pos token_str term_str term_id term_rank n num stop stem_porter ... disgust fear joy negative positive sadness surprise trust polarity html
chap_num para_num sent_num token_num
1 0 0 0 ('Rose', 'NNP') NNP Rose rose 22269 0.0 0.0 0.0 0.0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <span class='sent0'>rose</span>
1 ('sat', 'VBD') VBD sat sat 22651 0.0 0.0 0.0 0.0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <span class='sent0'>sat</span>
2 ('all', 'DT') DT all all 889 0.0 0.0 0.0 0.0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <span class='sent0'>all</span>
3 ('alone', 'RB') RB alone alone 951 0.0 0.0 0.0 0.0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <span class='sent0'>alone</span>
4 ('in', 'IN') IN in in 13335 0.0 0.0 0.0 0.0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <span class='sent0'>in</span>
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
24 129 0 40 ('to', 'TO') TO to to 26510 0.0 0.0 0.0 0.0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <span class='sent0'>to</span>
41 ('hear', 'VB') VB hear hear 12264 0.0 0.0 0.0 0.0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <span class='sent0'>hear</span>
42 ('about', 'IN') IN about about 283 0.0 0.0 0.0 0.0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <span class='sent0'>about</span>
43 ('new', 'JJ') JJ new new 17347 0.0 0.0 0.0 0.0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <span class='sent0'>new</span>
44 ('eBooks', 'NNS') NNS eBooks ebooks 8403 0.0 0.0 0.0 0.0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <span class='sent0'>ebooks</span>

77648 rows × 35 columns

In [357]:
EIGHT2_sents = EIGHT2.groupby(SENTS[1:])[emo_cols].mean()
JACKJILL2_sents = JACKJILL2.groupby(SENTS[1:])[emo_cols].mean()
OLD2_sents = OLD2.groupby(SENTS[1:])[emo_cols].mean()
JO2_sents = JO2.groupby(SENTS[1:])[emo_cols].mean()
LITTLEM2_sents = LITTLEM2.groupby(SENTS[1:])[emo_cols].mean()
LITTLEW2_sents = LITTLEW2.groupby(SENTS[1:])[emo_cols].mean()
ROSE2_sents = ROSE2.groupby(SENTS[1:])[emo_cols].mean()
UNDER2_sents = UNDER2.groupby(SENTS[1:])[emo_cols].mean()
POE12_sents = POE12.groupby(SENTS[1:])[emo_cols].mean()
POE22_sents = POE22.groupby(SENTS[1:])[emo_cols].mean()
POE32_sents = POE32.groupby(SENTS[1:])[emo_cols].mean()
POE42_sents = POE42.groupby(SENTS[1:])[emo_cols].mean()
POE52_sents = POE52.groupby(SENTS[1:])[emo_cols].mean()
In [358]:
EIGHT2_sents['sent_str'] = EIGHT2.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
EIGHT2['html_str'] = EIGHT2.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [359]:
JACKJILL2_sents['sent_str'] = JACKJILL2.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
JACKJILL2_sents['html_str'] = JACKJILL2.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [360]:
OLD2_sents['sent_str'] = OLD2.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
OLD2_sents['html_str'] = OLD2.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [361]:
JO2_sents['sent_str'] = JO2.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
JO2_sents['html_str'] = JO2.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [362]:
LITTLEM2_sents['sent_str'] = LITTLEM2.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
LITTLEM2_sents['html_str'] = LITTLEM2.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [363]:
LITTLEW2_sents['sent_str'] = LITTLEW2.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
LITTLEW2_sents['html_str'] = LITTLEW2.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [364]:
ROSE2_sents['sent_str'] = ROSE2.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
ROSE2_sents['html_str'] = ROSE2.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [365]:
UNDER2_sents['sent_str'] = UNDER2.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
UNDER2_sents['html_str'] = UNDER2.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [366]:
POE12_sents['sent_str'] = POE12.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
POE12_sents['html_str'] = POE12.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [367]:
POE22_sents['sent_str'] = POE22.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
POE22_sents['html_str'] = POE22.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [368]:
POE32_sents['sent_str'] = POE32.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
POE32_sents['html_str'] = POE32.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [369]:
POE42_sents['sent_str'] = POE42.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
POE42_sents['html_str'] = POE42.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [370]:
POE52_sents['sent_str'] = POE52.groupby(SENTS[1:]).term_str.apply(lambda x: x.str.cat(sep=' '))
POE52_sents['html_str'] = POE52.groupby(SENTS[1:]).html.apply(lambda x: x.str.cat(sep=' '))
In [371]:
def sample_sentences(df):
    rows = []
    for idx in df.sample(10).index:

        valence = round(df.loc[idx, emo], 4)     
        t = 0
        if valence > t: color = '#ccffcc'
        elif valence < t: color = '#ffcccc'
        else: color = '#f2f2f2'
        z=0
        rows.append("""<tr style="background-color:{0};padding:.5rem 1rem;font-size:110%;">
        <td>{1}</td><td>{3}</td><td width="400" style="text-align:left;">{2}</td>
        </tr>""".format(color, valence, df.loc[idx, 'html_str'], idx))

    display(HTML('<style>#sample1 td{font-size:120%;vertical-align:top;} .sent-1{color:red;font-weight:bold;} .sent1{color:green;font-weight:bold;}</style>'))
    display(HTML('<table id="sample1"><tr><th>Sentiment</th><th>ID</th><th width="600">Sentence</th></tr>'+''.join(rows)+'</table>'))
In [387]:
# sample_sentences(EIGHT2_sents)
In [388]:
# sample_sentences(JACKJILL2_sents)

Try VADER

In [381]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()
In [384]:
EIGHT_vader_cols = EIGHT2_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
EIGHT_vader = pd.concat([EIGHT2_sents, EIGHT_vader_cols], axis=1)
In [386]:
w = int(EIGHT_vader.shape[0] / 5)
EIGHT_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
EIGHT_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
EIGHT_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[386]:
<matplotlib.axes._subplots.AxesSubplot at 0x2477dcb17f0>
In [389]:
JACKJILL_vader_cols = JACKJILL2_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
JACKJILL_vader = pd.concat([JACKJILL2_sents, JACKJILL_vader_cols], axis=1)
In [390]:
w = int(JACKJILL_vader.shape[0] / 5)
JACKJILL_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
JACKJILL_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
JACKJILL_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[390]:
<matplotlib.axes._subplots.AxesSubplot at 0x2470f09b048>
In [391]:
OLD_vader_cols = OLD2_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
OLD_vader = pd.concat([OLD2_sents, OLD_vader_cols], axis=1)
w = int(OLD_vader.shape[0] / 5)
OLD_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
OLD_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
OLD_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[391]:
<matplotlib.axes._subplots.AxesSubplot at 0x2477e524898>
In [392]:
JO_vader_cols = JO2_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
JO_vader = pd.concat([JO2_sents, JO_vader_cols], axis=1)
w = int(JO_vader.shape[0] / 5)
JO_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
JO_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
JO_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[392]:
<matplotlib.axes._subplots.AxesSubplot at 0x2477e155080>
In [394]:
LITTLEM_vader_cols = LITTLEM2_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
LITTLEM_vader = pd.concat([LITTLEM2_sents, LITTLEM_vader_cols], axis=1)
w = int(LITTLEM_vader.shape[0] / 5)
LITTLEM_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
LITTLEM_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
LITTLEM_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[394]:
<matplotlib.axes._subplots.AxesSubplot at 0x247016acf98>
In [395]:
LITTLEW_vader_cols = LITTLEW2_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
LITTLEW_vader = pd.concat([LITTLEW2_sents, LITTLEW_vader_cols], axis=1)
w = int(LITTLEW_vader.shape[0] / 5)
LITTLEW_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
LITTLEW_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
LITTLEW_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[395]:
<matplotlib.axes._subplots.AxesSubplot at 0x24704fa0fd0>
In [397]:
ROSE_vader_cols = ROSE2_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
ROSE_vader = pd.concat([ROSE2_sents, ROSE_vader_cols], axis=1)
w = int(ROSE_vader.shape[0] / 5)
ROSE_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
ROSE_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
ROSE_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[397]:
<matplotlib.axes._subplots.AxesSubplot at 0x2470fa18748>
In [398]:
UNDER_vader_cols = UNDER2_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
UNDER_vader = pd.concat([UNDER2_sents, UNDER_vader_cols], axis=1)
w = int(UNDER_vader.shape[0] / 5)
UNDER_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
UNDER_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
UNDER_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[398]:
<matplotlib.axes._subplots.AxesSubplot at 0x2477d7847b8>
In [399]:
POE1_vader_cols = POE12_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
POE1_vader = pd.concat([POE12_sents, POE1_vader_cols], axis=1)
w = int(POE1_vader.shape[0] / 5)
POE1_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
POE1_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
POE1_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[399]:
<matplotlib.axes._subplots.AxesSubplot at 0x2477e5052b0>
In [400]:
POE2_vader_cols = POE22_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
POE2_vader = pd.concat([POE22_sents, POE2_vader_cols], axis=1)
w = int(POE2_vader.shape[0] / 5)
POE2_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
POE2_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
POE2_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[400]:
<matplotlib.axes._subplots.AxesSubplot at 0x2477dca2ac8>
In [401]:
POE3_vader_cols = POE32_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
POE3_vader = pd.concat([POE32_sents, POE3_vader_cols], axis=1)
w = int(POE3_vader.shape[0] / 5)
POE3_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
POE3_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
POE3_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[401]:
<matplotlib.axes._subplots.AxesSubplot at 0x2477e6e1358>
In [402]:
POE4_vader_cols = POE42_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
POE4_vader = pd.concat([POE42_sents, POE4_vader_cols], axis=1)
w = int(POE4_vader.shape[0] / 5)
POE4_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
POE4_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
POE4_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[402]:
<matplotlib.axes._subplots.AxesSubplot at 0x2477e522c50>
In [403]:
POE5_vader_cols = POE52_sents.sent_str.apply(analyser.polarity_scores).apply(lambda x: pd.Series(x))
POE5_vader = pd.concat([POE52_sents, POE5_vader_cols], axis=1)
w = int(POE5_vader.shape[0] / 5)
POE5_vader[['pos','neg']].rolling(w).mean().plot(figsize=(25,5))
POE5_vader[['neu']].rolling(w).mean().plot(figsize=(25,5))
POE5_vader[['compound']].rolling(w).mean().plot(figsize=(25,5))
Out[403]:
<matplotlib.axes._subplots.AxesSubplot at 0x2477d275828>
In [ ]: